# Obtenção e organização dos dados

In [1]:
import pandas as pd

df1 = pd.read_csv('../dados/acordaos-unicos.csv', sep = '|')[['acordao', 'areas', 'texto']]
df1.head()

Unnamed: 0,acordao,areas,texto
0,297/2016-P,Responsabilidade,TRIBUNAL DE CONTAS DA UNIÃO\tTC 010.084/2015-0...
1,366/2016-P,Finanças Públicas,TRIBUNAL DE CONTAS DA UNIÃO\tTC 005.933/2014-5...
2,944/2016-P,Responsabilidade,TRIBUNAL DE CONTAS DA UNIÃO\tTC 042.038/2012-0...
3,30/2016-P,Direito Processual,TRIBUNAL DE CONTAS DA UNIÃO\tTC 000.742/2014-7...
4,55/2016-P,Pessoal,;-;;Wania Lucia Pasquarelli do NascimentoTCUWa...


In [2]:
df2 = pd.read_csv('../dados/acordaos-unicos-filtrados-6000.csv', sep = '|')[['acordao', 'filtrado_6000']]
df2['filtrado_6000'] = df2['filtrado_6000'].astype(str)
df2.head()

Unnamed: 0,acordao,filtrado_6000
0,297/2016-P,tribunal conta união tc grupo classe plenário ...
1,366/2016-P,tribunal conta união tc grupo classe ii plenár...
2,944/2016-P,tribunal conta união tc grupo classe plenário ...
3,30/2016-P,tribunal conta união tc grupo classe plenário ...
4,55/2016-P,wania lucia pasquarelli nascimentotcuwania luc...


In [3]:
df = df1.merge(df2, on='acordao', how='inner')
df.head()

Unnamed: 0,acordao,areas,texto,filtrado_6000
0,297/2016-P,Responsabilidade,TRIBUNAL DE CONTAS DA UNIÃO\tTC 010.084/2015-0...,tribunal conta união tc grupo classe plenário ...
1,366/2016-P,Finanças Públicas,TRIBUNAL DE CONTAS DA UNIÃO\tTC 005.933/2014-5...,tribunal conta união tc grupo classe ii plenár...
2,944/2016-P,Responsabilidade,TRIBUNAL DE CONTAS DA UNIÃO\tTC 042.038/2012-0...,tribunal conta união tc grupo classe plenário ...
3,30/2016-P,Direito Processual,TRIBUNAL DE CONTAS DA UNIÃO\tTC 000.742/2014-7...,tribunal conta união tc grupo classe plenário ...
4,55/2016-P,Pessoal,;-;;Wania Lucia Pasquarelli do NascimentoTCUWa...,wania lucia pasquarelli nascimentotcuwania luc...


In [4]:
df1.shape, df2.shape, df.shape

((9739, 3), (9739, 2), (9739, 4))

In [5]:
from sklearn.preprocessing import LabelBinarizer

areas = df.groupby(['areas']).groups.keys()
lbArea = LabelBinarizer()
lbArea.fit([x for x in areas])
y = lbArea.transform(df['areas'])
lbArea.classes_, y.shape

(array(['Competência do TCU', 'Contrato Administrativo', 'Convênio',
        'Desestatização', 'Direito Processual', 'Finanças Públicas',
        'Gestão Administrativa', 'Licitação', 'Pessoal',
        'Responsabilidade'], dtype='<U23'), (9739, 10))

# Treinamentos com Cross-validation

### Modelos a serem comparados:

* Teste 1 - Acórdãos únicos originais (df) com rede convolucional
* Teste 2 - Acórdãos únicos filtrados 6000 (dff) com rede convolucional

In [6]:
colunas_scores = list(lbArea.classes_)
colunas_scores.extend(['accuracy', 'macro avg', 'weighted avg'])
alternativas = ['rede convolucional sobre texto original', 'rede convolucional sobre texto filtrado']

In [7]:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors

print('\tCarregamento do modelo de acordaos...')
modelo = Word2Vec.load('../vocabularios/modelo-acordaos.w2v')

	Carregamento do modelo de acordaos...


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [8]:
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, MaxPooling1D, Dense, GlobalMaxPooling1D, Flatten, GRU
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np


df_medias = pd.DataFrame()
for alt in range(0, 2):
    str_alt = str(alt)
    fold = 0
    df_scores = pd.DataFrame()

    print('\n\nAlternativa', alt+1, '-', alternativas[alt])
    
    if alt == 0:
        limite_texto = 40000
        dim_vetor = 100

        print('\tTokenizacao e montagem de sequencias...')
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(df['texto'])
        vocabulario = len(tokenizer.word_index) + 1

        sequences = tokenizer.texts_to_sequences(df['texto'])

        print('\tMontagem da matriz de embeddings...')
        embedding_matrix = np.zeros((vocabulario, dim_vetor))
        for word, i in tokenizer.word_index.items():
            if word in modelo.wv:
                embedding_matrix[i] = modelo.wv[word]
    elif alt == 1:
        limite_texto = 6000

        print('\tTokenizacao e montagem de sequencias...')
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(df['filtrado_6000'])
        vocabulario = len(tokenizer.word_index) + 1

        sequences = tokenizer.texts_to_sequences(df['filtrado_6000'])
        
        print('\tMontagem da matriz de embeddings...')
        embedding_matrix = np.zeros((vocabulario, dim_vetor))
        for word, i in tokenizer.word_index.items():
            if word in modelo.wv:
                embedding_matrix[i] = modelo.wv[word]
                
    for train_index, val_index in KFold(n_splits=10, random_state=42, shuffle=True).split(df):
        str_fold = 'Fold ' + str(fold)
        print()
        print()
        print('Alternativa ' + alternativas[alt], '-', str_fold + ':')
        print('\tDefinicao de valores de entrada e saida da rede...')

        df_train = df.loc[train_index]
        df_val = df.loc[val_index]

            
        if alt == 0:
            sequences_train = tokenizer.texts_to_sequences(df_train['texto'])
            sequences_val = tokenizer.texts_to_sequences(df_val['texto'])
        else:
            sequences_train = tokenizer.texts_to_sequences(df_train['filtrado_6000'])
            sequences_val = tokenizer.texts_to_sequences(df_val['filtrado_6000'])

        x_train = pad_sequences(sequences_train, maxlen=limite_texto)
        x_val = pad_sequences(sequences_val, maxlen=limite_texto)

        y_train = lbArea.transform(df_train['areas'])
        y_val = lbArea.transform(df_val['areas'])

        print('\tTreinamento da rede...')
        model = Sequential()
        model.add(Embedding(vocabulario, dim_vetor, input_length=limite_texto, trainable=True,  weights=[embedding_matrix]))
        model.add(Conv1D(64, 7, activation='relu'))
        model.add(MaxPooling1D(5))
        model.add(Conv1D(32, 7, activation='relu'))
        model.add(GlobalMaxPooling1D())
            
        model.add(Dense(y.shape[1], activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer=RMSprop(),  metrics=['categorical_accuracy'])

        checkpoint_filename = 'weights' + str_alt + '-' + str(fold) + '.hdf5'
        checkpointer = ModelCheckpoint(filepath=checkpoint_filename, monitor='val_categorical_accuracy', verbose=1, save_best_only=True)
        model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_val, y_val), verbose=1, shuffle=False, callbacks=[checkpointer])

        print('\n\tAvaliacao do melhor modelo e registro dos scores...')
        model.load_weights(checkpoint_filename)
        y_val_pred = model.predict_classes(x_val, verbose=1)
        y_val_i = [list(x).index(1) for x in y_val]
        report = classification_report(y_val_i, y_val_pred, target_names=lbArea.classes_, output_dict = True)
        for col in colunas_scores:
            if col == 'accuracy':
                f = report[col]
            else:
                f = report[col]['f1-score']
            df_scores.loc[str_fold,col] = f
        fold += 1
    df_medias[alternativas[alt] + ' mean'] = df_scores.mean()
    df_medias[alternativas[alt] + ' std'] = df_scores.std()
df_medias.T

Using TensorFlow backend.




Alternativa 1 - rede convolucional sobre texto original
	Tokenizacao e montagem de sequencias...
	Montagem da matriz de embeddings...



Alternativa rede convolucional sobre texto original - Fold 0:
	Definicao de valores de entrada e saida da rede...


W0319 12:36:39.805193 139863296390976 deprecation_wrapper.py:119] From /home/leonardo/anaconda3/envs/gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0319 12:36:39.817300 139863296390976 deprecation_wrapper.py:119] From /home/leonardo/anaconda3/envs/gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0319 12:36:39.819158 139863296390976 deprecation_wrapper.py:119] From /home/leonardo/anaconda3/envs/gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0319 12:36:39.829416 139863296390976 deprecation_wrapper.py:119] From /home/leonardo/anaconda3/envs/gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprec

	Treinamento da rede...


W0319 12:36:41.126345 139863296390976 deprecation_wrapper.py:119] From /home/leonardo/anaconda3/envs/gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0319 12:36:41.163959 139863296390976 deprecation_wrapper.py:119] From /home/leonardo/anaconda3/envs/gpu/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0319 12:36:41.244848 139863296390976 deprecation.py:323] From /home/leonardo/anaconda3/envs/gpu/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 8765 samples, validate on 974 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.57187, saving model to weights0-0.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.57187 to 0.61910, saving model to weights0-0.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.61910 to 0.62936, saving model to weights0-0.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy did not improve from 0.62936
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.62936
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.62936 to 0.63552, saving model to weights0-0.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.63552
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.63552
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.63552
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.63552
Epoch 11/20

Epoc


Epoch 00012: val_categorical_accuracy improved from 0.66324 to 0.66632, saving model to weights0-1.hdf5
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.66632
Epoch 14/20

Epoch 00014: val_categorical_accuracy improved from 0.66632 to 0.66838, saving model to weights0-1.hdf5
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.66838
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.66838
Epoch 17/20

Epoch 00017: val_categorical_accuracy improved from 0.66838 to 0.67146, saving model to weights0-1.hdf5
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.67146
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.67146
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.67146

	Avaliacao do melhor modelo and registro dos scores...



Alternativa rede convolucional sobre texto original - Fold 2:
	Definicao de valores de entrada e saida da rede...
	Treinam


Epoch 00002: val_categorical_accuracy improved from 0.53593 to 0.61910, saving model to weights0-3.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy did not improve from 0.61910
Epoch 4/20

Epoch 00004: val_categorical_accuracy did not improve from 0.61910
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.61910 to 0.64476, saving model to weights0-3.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy did not improve from 0.64476
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.64476
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.64476
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.64476
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.64476
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.64476
Epoch 12/20

Epoch 00012: val_categorical_accuracy improved from 0.64476 to 0.64990, saving model to weights0-3.hdf5
Epoch 13/20

Epoch 00013: va


Epoch 00013: val_categorical_accuracy did not improve from 0.65811
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.65811
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.65811
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.65811
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.65811
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.65811
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.65811
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.65811

	Avaliacao do melhor modelo and registro dos scores...



Alternativa rede convolucional sobre texto original - Fold 5:
	Definicao de valores de entrada e saida da rede...


  'precision', 'predicted', average, warn_for)


	Treinamento da rede...
Train on 8765 samples, validate on 974 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.58932, saving model to weights0-5.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.58932 to 0.63655, saving model to weights0-5.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy did not improve from 0.63655
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.63655 to 0.64066, saving model to weights0-5.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.64066
Epoch 6/20

Epoch 00006: val_categorical_accuracy did not improve from 0.64066
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.64066
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.64066 to 0.64682, saving model to weights0-5.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.64682
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0

Epoch 12/20

Epoch 00012: val_categorical_accuracy improved from 0.66222 to 0.67043, saving model to weights0-6.hdf5
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.67043
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.67043
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.67043
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.67043
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.67043
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.67043
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.67043
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.67043

	Avaliacao do melhor modelo and registro dos scores...



Alternativa rede convolucional sobre texto original - Fold 7:
	Definicao de valores de entrada e saida da rede...
	Treinamento da rede...
Train on 8765 samples, validate on 974 samples

  'precision', 'predicted', average, warn_for)


	Treinamento da rede...
Train on 8765 samples, validate on 974 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.56366, saving model to weights0-8.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.56366 to 0.62115, saving model to weights0-8.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.62115 to 0.62834, saving model to weights0-8.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.62834 to 0.63552, saving model to weights0-8.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.63552
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.63552 to 0.64682, saving model to weights0-8.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.64682
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.64682
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.64682
Epoch 10/20

Epoch 00010: val_catego

  'precision', 'predicted', average, warn_for)


	Treinamento da rede...
Train on 8766 samples, validate on 973 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.58171, saving model to weights0-9.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.58171 to 0.62384, saving model to weights0-9.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.62384 to 0.65468, saving model to weights0-9.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy did not improve from 0.65468
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.65468
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.65468 to 0.66084, saving model to weights0-9.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.66084
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.66084
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.66084
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0

  'precision', 'predicted', average, warn_for)


	Montagem da matriz de embeddings...



Alternativa rede convolucional sobre texto filtrado - Fold 0:
	Definicao de valores de entrada e saida da rede...
	Treinamento da rede...
Train on 8765 samples, validate on 974 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.56879, saving model to weights1-0.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.56879 to 0.62834, saving model to weights1-0.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.62834 to 0.63039, saving model to weights1-0.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.63039 to 0.64682, saving model to weights1-0.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.64682
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.64682 to 0.65195, saving model to weights1-0.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.65195
Epoch 8/20

Epoch 00008: val_categorical


Epoch 00011: val_categorical_accuracy did not improve from 0.67454
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.67454
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.67454
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.67454
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.67454
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.67454
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.67454
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.67454
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.67454
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.67454

	Avaliacao do melhor modelo and registro dos scores...



Alternativa rede convolucional sobre texto filtrado - Fold 2:
	Definicao de valores de entrada e saida da rede...
	Treinamento da rede...
Train on 8765 s

Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.62012 to 0.63860, saving model to weights1-3.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.63860 to 0.64476, saving model to weights1-3.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.64476 to 0.65606, saving model to weights1-3.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.65606
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.65606 to 0.66735, saving model to weights1-3.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.66735 to 0.67556, saving model to weights1-3.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.67556
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.67556
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.67556
Epoch 11/20

Epoch 00011: val_categorical_accuracy improved from 0.67556 to 0.68070, saving model to we


Epoch 00013: val_categorical_accuracy improved from 0.64476 to 0.65503, saving model to weights1-4.hdf5
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.65503
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.65503
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.65503
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.65503
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.65503
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.65503
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.65503

	Avaliacao do melhor modelo and registro dos scores...



Alternativa rede convolucional sobre texto filtrado - Fold 5:
	Definicao de valores de entrada e saida da rede...
	Treinamento da rede...
Train on 8765 samples, validate on 974 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.55441, saving mod


Epoch 00003: val_categorical_accuracy improved from 0.57598 to 0.59548, saving model to weights1-6.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.59548 to 0.61910, saving model to weights1-6.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.61910 to 0.66735, saving model to weights1-6.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy did not improve from 0.66735
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.66735
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.66735
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.66735
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.66735
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.66735
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.66735
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.66735
Epoch 14/20

Epoch 00014: v

  'precision', 'predicted', average, warn_for)


	Treinamento da rede...
Train on 8765 samples, validate on 974 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.52977, saving model to weights1-7.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.52977 to 0.61910, saving model to weights1-7.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy did not improve from 0.61910
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.61910 to 0.64579, saving model to weights1-7.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.64579
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.64579 to 0.64579, saving model to weights1-7.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.64579
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.64579
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.64579
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0


Epoch 00012: val_categorical_accuracy did not improve from 0.65708
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.65708
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.65708
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.65708
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.65708
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.65708
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.65708
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.65708
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.65708

	Avaliacao do melhor modelo and registro dos scores...



Alternativa rede convolucional sobre texto filtrado - Fold 9:
	Definicao de valores de entrada e saida da rede...
	Treinamento da rede...
Train on 8766 samples, validate on 973 samples
Epoch 1/20

Epoch 00001: val_categorical_accurac

Unnamed: 0,Competência do TCU,Contrato Administrativo,Convênio,Desestatização,Direito Processual,Finanças Públicas,Gestão Administrativa,Licitação,Pessoal,Responsabilidade,accuracy,macro avg,weighted avg
rede convolucional sobre texto original mean,0.278679,0.265907,0.174573,0.293693,0.527058,0.2992,0.037799,0.750727,0.929666,0.587236,0.661977,0.414454,0.641547
rede convolucional sobre texto original std,0.106389,0.120407,0.076471,0.094694,0.031975,0.07497,0.065077,0.024781,0.010688,0.023697,0.00973,0.026519,0.014575
rede convolucional sobre texto filtrado mean,0.31936,0.314927,0.228093,0.334805,0.546508,0.384883,0.138394,0.747693,0.930285,0.576888,0.667523,0.452184,0.652739
rede convolucional sobre texto filtrado std,0.105503,0.074974,0.087037,0.156089,0.038256,0.087383,0.126304,0.024785,0.006468,0.049681,0.012543,0.028517,0.013164


In [9]:
df_medias.T.to_csv('scores_convolucionais.csv', encoding = 'Latin1')