In [1]:
import pandas as pd

df = pd.read_csv('../dados/jurisprudencia_selecionada_excertos.CSV', sep = ';')
df.head()

Unnamed: 0,COD,NUM_ENUNCIADO,COD_AREA,DESCR_AREA,COD_TEMA,DESCR_TEMA,COD_SUBTEMA,DESCR_SUBTEMA,COD_DOC_TRAMITAVEL_ENUNCIADO,TEXTO_ENUNCIADO,ACORDAO,TIPO_PROCESSO
0,1400,1236,50,Responsabilidade,488,Solidariedade,261,Benefício previdenciário,54995437,Não comprovada a participação do beneficiário ...,Acórdão 297/2016 - PL,Tomada de Contas Especial
1,1700,1534,46,Finanças Públicas,981,Exportação,983,Petróleo,55025587,A operação ficta de exportação de plataformas ...,Acórdão 366/2016 - PL,Solicitação do Congresso Nacional
2,5700,5314,50,Responsabilidade,203,Multa,1021,Dosimetria,55455370,"No âmbito do TCU, a dosimetria da pena tem com...",Acórdão 944/2016 - PL,Acompanhamento
3,284,40,45,Direito Processual,162,Princípio da independência das instâncias,481,Decisão judicial,54773746,O princípio da independência das instâncias pe...,Acórdão 30/2016 - PL,Tomada de Contas Especial
4,298,54,49,Pessoal,141,Sistema S,142,Nepotismo,54773402,É vedado aos dirigentes das entidades do Siste...,Acórdão 55/2016 - PL,Representação


In [2]:
df.shape

(13312, 12)

In [3]:
areas = df.groupby(['DESCR_AREA']).groups.keys()
areas

dict_keys(['Competência do TCU', 'Contrato Administrativo', 'Convênio', 'Desestatização', 'Direito Processual', 'Finanças Públicas', 'Gestão Administrativa', 'Licitação', 'Pessoal', 'Responsabilidade'])

In [4]:
from sklearn.preprocessing import LabelBinarizer

lbArea = LabelBinarizer()
lbArea.fit([x for x in areas])
lbArea.classes_

array(['Competência do TCU', 'Contrato Administrativo', 'Convênio',
       'Desestatização', 'Direito Processual', 'Finanças Públicas',
       'Gestão Administrativa', 'Licitação', 'Pessoal',
       'Responsabilidade'], dtype='<U23')

In [5]:
y = lbArea.transform(df['DESCR_AREA'])
y.shape

(13312, 10)

In [6]:
from keras.preprocessing.text import Tokenizer
import numpy as np

vocabulario = 20000
limite_texto = 200
dim_vetor = 100

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['TEXTO_EXCERTO'])
vocabulario = len(tokenizer.word_index) + 1

sequences = tokenizer.texts_to_sequences(df['TEXTO_EXCERTO'])

Using TensorFlow backend.


In [7]:
from keras.preprocessing.sequence import pad_sequences

x = pad_sequences(sequences, maxlen=limite_texto)

print('Shape of data tensor:', x.shape)

Shape of data tensor: (13312, 200)


In [8]:
alternativas = [None, 'sem pré-treino', 'NILC fixo', 'NILC variável', 'Acordãos fixo', 'Acordãos variável']

In [9]:
from gensim.models import Word2Vec
from gensim.models import KeyedVectors

print('Montando embeddings NILC:')
model_nilc = KeyedVectors.load_word2vec_format('../externos/model.txt')
embedding_matrix_nilc = np.zeros((vocabulario, dim_vetor))
ok = 0
for word, i in tokenizer.word_index.items():
    if word in model_nilc:
        embedding_matrix_nilc[i] = model_nilc[word]
        ok += 1
print('\tVocabulario:', i, ' - encontrados no modelo:', ok, '=', ok * 100. / i)

print('Montando embeddings Acordaos:')
model_acordaos = Word2Vec.load('../vocabularios/modelo-acordaos.w2v')
embedding_matrix_acordaos = np.zeros((vocabulario, dim_vetor))
ok = 0
for word, i in tokenizer.word_index.items():
    if word in model_acordaos.wv:
        embedding_matrix_acordaos[i] = model_acordaos.wv[word]
        ok += 1
print('\tVocabulario:', i, ' - encontrados no modelo:', ok, '=', ok * 100. / i)

embeddings = [None, None, embedding_matrix_nilc, embedding_matrix_nilc, embedding_matrix_acordaos, embedding_matrix_acordaos]

Montando embeddings NILC:


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


	Vocabulario: 15387  - encontrados no modelo: 13758 = 89.41314096315071
Montando embeddings Acordaos:
	Vocabulario: 15387  - encontrados no modelo: 14572 = 94.70332098524729


In [10]:
colunas_scores = list(lbArea.classes_)
colunas_scores.extend(['accuracy', 'macro avg', 'weighted avg'])

In [12]:
def gera_embedding(alt, model, vocabulario, dim_vetor, limite_texto):
        if alt == 1:
            model.add(Embedding(vocabulario, dim_vetor, input_length=limite_texto, trainable=True))
        elif alt == 2:
            model.add(Embedding(vocabulario, dim_vetor, input_length=limite_texto, trainable=False, weights=[embedding_matrix_nilc]))
        elif alt == 3:
            model.add(Embedding(vocabulario, dim_vetor, input_length=limite_texto, trainable=True,  weights=[embedding_matrix_nilc]))
        elif alt == 4:
            model.add(Embedding(vocabulario, dim_vetor, input_length=limite_texto, trainable=False, weights=[embedding_matrix_acordaos]))
        elif alt == 5:
            model.add(Embedding(vocabulario, dim_vetor, input_length=limite_texto, trainable=True,  weights=[embedding_matrix_acordaos]))

In [14]:
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Dense, GRU, Embedding
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint

print('Realizando treinamentos:')
df_medias = pd.DataFrame()
for alt in range(1, 6):
    str_alt = str(alt)
    fold = 0
    df_scores = pd.DataFrame()
    for train_index, val_index in KFold(n_splits=10, random_state=42, shuffle=True).split(df):
        str_fold = 'Fold ' + str(fold)
        print()
        print('Alternativa ' + alternativas[alt], '-', str_fold + ':')

        df_train = df.loc[train_index]
        df_val = df.loc[val_index]

        sequences_train = tokenizer.texts_to_sequences(df_train['TEXTO_EXCERTO'])
        sequences_val = tokenizer.texts_to_sequences(df_val['TEXTO_EXCERTO'])

        x_train = pad_sequences(sequences_train, maxlen=limite_texto)
        x_val = pad_sequences(sequences_val, maxlen=limite_texto)

        y_train = lbArea.transform(df_train['DESCR_AREA'])
        y_val = lbArea.transform(df_val['DESCR_AREA'])

        model = Sequential()
        gera_embedding(alt, model, vocabulario, dim_vetor, limite_texto)
        model.add(GRU(256, dropout=0.2, recurrent_dropout=0.2))
        model.add(Dense(y.shape[1], activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer=RMSprop(),  metrics=['categorical_accuracy'])

        checkpoint_filename = 'weights' + str_alt + '-' + str(fold) + '.hdf5'
        checkpointer = ModelCheckpoint(filepath=checkpoint_filename, monitor='val_categorical_accuracy', verbose=1, save_best_only=True)
        model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_val, y_val), verbose=1, shuffle=False, callbacks=[checkpointer])

        print('Evaluating best model and registering score:')
        model.load_weights(checkpoint_filename)
        y_val_pred = model.predict_classes(x_val, verbose=1)
        y_val_i = [list(x).index(1) for x in y_val]
        report = classification_report(y_val_i, y_val_pred, target_names=lbArea.classes_, output_dict = True)
        for col in colunas_scores:
            if col == 'accuracy':
                f = report[col]
            else:
                f = report[col]['f1-score']
            df_scores.loc[str_fold,col] = f
        fold += 1
    df_medias[alternativas[alt] + ' mean'] = df_scores.mean()
    df_medias[alternativas[alt] + ' std'] = df_scores.std()
df_medias.T

Realizando treinamentos:

Alternativa sem pré-treino - Fold 0:
Train on 11980 samples, validate on 1332 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.55180, saving model to weights1-0.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.55180 to 0.64640, saving model to weights1-0.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.64640 to 0.77402, saving model to weights1-0.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.77402 to 0.80030, saving model to weights1-0.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.80030 to 0.80706, saving model to weights1-0.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.80706 to 0.83859, saving model to weights1-0.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.83859
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.83859 to 0.84610, saving model to weights1-0.hdf5



Epoch 00011: val_categorical_accuracy did not improve from 0.87012
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.87012
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.87012
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.87012
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.87012
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.87012
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.87012
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.87012
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.87012
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.87012
Evaluating best model and registering score:

Alternativa sem pré-treino - Fold 2:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.54545,


Epoch 00001: val_categorical_accuracy improved from -inf to 0.57100, saving model to weights1-3.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.57100 to 0.67092, saving model to weights1-3.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.67092 to 0.77010, saving model to weights1-3.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.77010 to 0.78663, saving model to weights1-3.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.78663 to 0.81518, saving model to weights1-3.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.81518 to 0.83997, saving model to weights1-3.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.83997
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.83997 to 0.84523, saving model to weights1-3.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy improved from 0.84523 to 0.85875, saving model to weights1-3.hdf5
Epoch 1


Epoch 00003: val_categorical_accuracy improved from 0.62810 to 0.73403, saving model to weights1-6.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.73403 to 0.76484, saving model to weights1-6.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.76484 to 0.82119, saving model to weights1-6.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.82119 to 0.82494, saving model to weights1-6.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.82494 to 0.83471, saving model to weights1-6.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.83471 to 0.85049, saving model to weights1-6.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy improved from 0.85049 to 0.85500, saving model to weights1-6.hdf5
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.85500 to 0.86176, saving model to weights1-6.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.86176
Ep


Epoch 00013: val_categorical_accuracy improved from 0.85199 to 0.85274, saving model to weights1-7.hdf5
Epoch 14/20

Epoch 00014: val_categorical_accuracy improved from 0.85274 to 0.85424, saving model to weights1-7.hdf5
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.85424
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.85424
Epoch 17/20

Epoch 00017: val_categorical_accuracy improved from 0.85424 to 0.86852, saving model to weights1-7.hdf5
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.86852
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.86852
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.86852
Evaluating best model and registering score:

Alternativa sem pré-treino - Fold 8:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.54545, saving model to weights1-8.hdf5
Epoch 2/20

Epoc


Epoch 00004: val_categorical_accuracy improved from 0.74906 to 0.80766, saving model to weights1-9.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.80766 to 0.82044, saving model to weights1-9.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.82044 to 0.84974, saving model to weights1-9.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.84974 to 0.86476, saving model to weights1-9.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.86476
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.86476
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.86476
Epoch 11/20

Epoch 00011: val_categorical_accuracy improved from 0.86476 to 0.87153, saving model to weights1-9.hdf5
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.87153
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.87153
Epoch 14/20

Epoch 00014: val_ca


Epoch 00005: val_categorical_accuracy improved from 0.80841 to 0.83471, saving model to weights2-2.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.83471 to 0.85124, saving model to weights2-2.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.85124
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.85124
Epoch 9/20

Epoch 00009: val_categorical_accuracy improved from 0.85124 to 0.86251, saving model to weights2-2.hdf5
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.86251 to 0.86551, saving model to weights2-2.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy improved from 0.86551 to 0.86627, saving model to weights2-2.hdf5
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.86627
Epoch 13/20

Epoch 00013: val_categorical_accuracy improved from 0.86627 to 0.87153, saving model to weights2-2.hdf5
Epoch 14/20

Epoch 00014: val_categorical_accuracy improved from 0.87153 t


Epoch 00004: val_categorical_accuracy improved from 0.79339 to 0.81743, saving model to weights2-5.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.81743 to 0.82194, saving model to weights2-5.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.82194 to 0.83321, saving model to weights2-5.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.83321 to 0.84523, saving model to weights2-5.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.84523
Epoch 9/20

Epoch 00009: val_categorical_accuracy improved from 0.84523 to 0.85575, saving model to weights2-5.hdf5
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.85575
Epoch 11/20

Epoch 00011: val_categorical_accuracy improved from 0.85575 to 0.86251, saving model to weights2-5.hdf5
Epoch 12/20

Epoch 00012: val_categorical_accuracy improved from 0.86251 to 0.86401, saving model to weights2-5.hdf5
Epoch 13/20

Epoch 00013: val_categori


Epoch 00005: val_categorical_accuracy improved from 0.80841 to 0.82194, saving model to weights2-8.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.82194 to 0.82494, saving model to weights2-8.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.82494 to 0.83246, saving model to weights2-8.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.83246 to 0.83396, saving model to weights2-8.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy improved from 0.83396 to 0.84298, saving model to weights2-8.hdf5
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.84298 to 0.84598, saving model to weights2-8.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy improved from 0.84598 to 0.85500, saving model to weights2-8.hdf5
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.85500
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.85500
Epoch 14/20

Epoch 00014: val_categor


Epoch 00015: val_categorical_accuracy improved from 0.86101 to 0.86401, saving model to weights2-9.hdf5
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.86401
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.86401
Epoch 18/20

Epoch 00018: val_categorical_accuracy improved from 0.86401 to 0.86927, saving model to weights2-9.hdf5
Epoch 19/20

Epoch 00019: val_categorical_accuracy improved from 0.86927 to 0.86927, saving model to weights2-9.hdf5
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.86927
Evaluating best model and registering score:

Alternativa NILC variável - Fold 0:
Train on 11980 samples, validate on 1332 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.64414, saving model to weights3-0.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.64414 to 0.77327, saving model to weights3-0.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved fr


Epoch 00015: val_categorical_accuracy did not improve from 0.90533
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.90533
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.90533
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.90533
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.90533
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.90533
Evaluating best model and registering score:

Alternativa NILC variável - Fold 3:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.66717, saving model to weights3-3.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.66717 to 0.80090, saving model to weights3-3.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.80090 to 0.85575, saving model to weights3-3.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improv


Epoch 00006: val_categorical_accuracy improved from 0.86326 to 0.87002, saving model to weights3-4.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.87002 to 0.88129, saving model to weights3-4.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.88129 to 0.88355, saving model to weights3-4.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.88355
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.88355
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.88355
Epoch 12/20

Epoch 00012: val_categorical_accuracy improved from 0.88355 to 0.88505, saving model to weights3-4.hdf5
Epoch 13/20

Epoch 00013: val_categorical_accuracy improved from 0.88505 to 0.88956, saving model to weights3-4.hdf5
Epoch 14/20

Epoch 00014: val_categorical_accuracy improved from 0.88956 to 0.89031, saving model to weights3-4.hdf5
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0


Epoch 00017: val_categorical_accuracy improved from 0.88956 to 0.89031, saving model to weights3-5.hdf5
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.89031
Epoch 19/20

Epoch 00019: val_categorical_accuracy improved from 0.89031 to 0.89031, saving model to weights3-5.hdf5
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.89031
Evaluating best model and registering score:

Alternativa NILC variável - Fold 6:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.60105, saving model to weights3-6.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.60105 to 0.76935, saving model to weights3-6.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.76935 to 0.82194, saving model to weights3-6.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.82194 to 0.84748, saving model to weights3-6.hdf5
Epoch 5/20

Epoch 00005: v


Epoch 00007: val_categorical_accuracy did not improve from 0.88129
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.88129 to 0.88655, saving model to weights3-7.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.88655
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.88655 to 0.88956, saving model to weights3-7.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.88956
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.88956
Epoch 13/20

Epoch 00013: val_categorical_accuracy improved from 0.88956 to 0.89181, saving model to weights3-7.hdf5
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.89181
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.89181
Epoch 16/20

Epoch 00016: val_categorical_accuracy improved from 0.89181 to 0.89181, saving model to weights3-7.hdf5
Epoch 17/20

Epoch 00017: val_categorical_accuracy improved from 0


Epoch 00017: val_categorical_accuracy improved from 0.90008 to 0.90909, saving model to weights3-8.hdf5
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.90909
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.90909
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.90909
Evaluating best model and registering score:

Alternativa NILC variável - Fold 9:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.63411, saving model to weights3-9.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.63411 to 0.78738, saving model to weights3-9.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.78738 to 0.82419, saving model to weights3-9.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.82419 to 0.84523, saving model to weights3-9.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from


Epoch 00007: val_categorical_accuracy improved from 0.87387 to 0.87538, saving model to weights4-0.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.87538
Epoch 9/20

Epoch 00009: val_categorical_accuracy improved from 0.87538 to 0.87688, saving model to weights4-0.hdf5
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.87688 to 0.88363, saving model to weights4-0.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.88363
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.88363
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.88363
Epoch 14/20

Epoch 00014: val_categorical_accuracy improved from 0.88363 to 0.88438, saving model to weights4-0.hdf5
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.88438
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.88438
Epoch 17/20

Epoch 00017: val_categorical_accuracy improved from 0


Epoch 00019: val_categorical_accuracy did not improve from 0.88138
Epoch 20/20

Epoch 00020: val_categorical_accuracy improved from 0.88138 to 0.88664, saving model to weights4-1.hdf5
Evaluating best model and registering score:

Alternativa Acordãos fixo - Fold 2:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.78287, saving model to weights4-2.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.78287 to 0.83246, saving model to weights4-2.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.83246 to 0.86176, saving model to weights4-2.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.86176 to 0.87453, saving model to weights4-2.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.87453 to 0.88204, saving model to weights4-2.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy did not improve from 0.88204
Epoch 7/20

Epoch 00007: val


Epoch 00009: val_categorical_accuracy improved from 0.88430 to 0.88881, saving model to weights4-3.hdf5
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.88881 to 0.88956, saving model to weights4-3.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy improved from 0.88956 to 0.89632, saving model to weights4-3.hdf5
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.89632
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.89632
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.89632
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.89632
Epoch 16/20

Epoch 00016: val_categorical_accuracy did not improve from 0.89632
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.89632
Epoch 18/20

Epoch 00018: val_categorical_accuracy improved from 0.89632 to 0.90233, saving model to weights4-3.hdf5
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not impro


Epoch 00020: val_categorical_accuracy did not improve from 0.88805
Evaluating best model and registering score:

Alternativa Acordãos fixo - Fold 5:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.78137, saving model to weights4-5.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.78137 to 0.83997, saving model to weights4-5.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.83997 to 0.84974, saving model to weights4-5.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.84974 to 0.86476, saving model to weights4-5.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.86476 to 0.87002, saving model to weights4-5.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy did not improve from 0.87002
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.87002 to 0.87754, saving model to weights4-5.hdf5
Epoch 8/20

Epoch 00008: val_

Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.78663 to 0.83471, saving model to weights4-8.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.83471 to 0.85124, saving model to weights4-8.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.85124 to 0.86251, saving model to weights4-8.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.86251
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.86251 to 0.87904, saving model to weights4-8.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.87904 to 0.88129, saving model to weights4-8.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.88129
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.88129
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.88129 to 0.88279, saving model to weights4-8.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy improved from 0


Epoch 00003: val_categorical_accuracy improved from 0.82733 to 0.86036, saving model to weights5-1.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy did not improve from 0.86036
Epoch 5/20

Epoch 00005: val_categorical_accuracy improved from 0.86036 to 0.86862, saving model to weights5-1.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.86862 to 0.86862, saving model to weights5-1.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.86862
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.86862 to 0.87538, saving model to weights5-1.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.87538
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.87538 to 0.88138, saving model to weights5-1.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.88138
Epoch 12/20

Epoch 00012: val_categorical_accuracy improved from 0.88138 to 0.88288, saving model to weights5-1.h


Epoch 00004: val_categorical_accuracy improved from 0.86777 to 0.87228, saving model to weights5-4.hdf5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.87228
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.87228 to 0.87754, saving model to weights5-4.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy improved from 0.87754 to 0.88430, saving model to weights5-4.hdf5
Epoch 8/20

Epoch 00008: val_categorical_accuracy did not improve from 0.88430
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.88430
Epoch 10/20

Epoch 00010: val_categorical_accuracy improved from 0.88430 to 0.89256, saving model to weights5-4.hdf5
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.89256
Epoch 12/20

Epoch 00012: val_categorical_accuracy improved from 0.89256 to 0.89406, saving model to weights5-4.hdf5
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.89406
Epoch 14/20

Epoch 00014: val_ca


Epoch 00005: val_categorical_accuracy improved from 0.87303 to 0.89181, saving model to weights5-7.hdf5
Epoch 6/20

Epoch 00006: val_categorical_accuracy improved from 0.89181 to 0.89406, saving model to weights5-7.hdf5
Epoch 7/20

Epoch 00007: val_categorical_accuracy did not improve from 0.89406
Epoch 8/20

Epoch 00008: val_categorical_accuracy improved from 0.89406 to 0.89557, saving model to weights5-7.hdf5
Epoch 9/20

Epoch 00009: val_categorical_accuracy did not improve from 0.89557
Epoch 10/20

Epoch 00010: val_categorical_accuracy did not improve from 0.89557
Epoch 11/20

Epoch 00011: val_categorical_accuracy did not improve from 0.89557
Epoch 12/20

Epoch 00012: val_categorical_accuracy did not improve from 0.89557
Epoch 13/20

Epoch 00013: val_categorical_accuracy did not improve from 0.89557
Epoch 14/20

Epoch 00014: val_categorical_accuracy did not improve from 0.89557
Epoch 15/20

Epoch 00015: val_categorical_accuracy did not improve from 0.89557
Epoch 16/20

Epoch 00016:


Epoch 00016: val_categorical_accuracy did not improve from 0.89932
Epoch 17/20

Epoch 00017: val_categorical_accuracy did not improve from 0.89932
Epoch 18/20

Epoch 00018: val_categorical_accuracy did not improve from 0.89932
Epoch 19/20

Epoch 00019: val_categorical_accuracy did not improve from 0.89932
Epoch 20/20

Epoch 00020: val_categorical_accuracy did not improve from 0.89932
Evaluating best model and registering score:

Alternativa Acordãos variável - Fold 9:
Train on 11981 samples, validate on 1331 samples
Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from -inf to 0.77385, saving model to weights5-9.hdf5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.77385 to 0.84448, saving model to weights5-9.hdf5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.84448 to 0.85800, saving model to weights5-9.hdf5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.85800 to 0.87303, saving model to weights5-9.hdf5
Epoch 5/20

Epoc

Unnamed: 0,Competência do TCU,Contrato Administrativo,Convênio,Desestatização,Direito Processual,Finanças Públicas,Gestão Administrativa,Licitação,Pessoal,Responsabilidade,accuracy,macro avg,weighted avg
sem pré-treino mean,0.773216,0.754331,0.718178,0.512194,0.881001,0.735516,0.595069,0.906701,0.95562,0.857467,0.866887,0.768929,0.864933
sem pré-treino std,0.037312,0.028111,0.039672,0.152096,0.015771,0.04658,0.109306,0.013776,0.006849,0.019275,0.004813,0.025638,0.005489
NILC fixo mean,0.834367,0.787535,0.73622,0.680333,0.878572,0.710316,0.667027,0.903933,0.956988,0.863169,0.875752,0.801846,0.874368
NILC fixo std,0.026728,0.019094,0.052377,0.110529,0.015674,0.049104,0.08452,0.012355,0.007489,0.012444,0.00775,0.022676,0.007861
NILC variável mean,0.828141,0.814332,0.774899,0.787684,0.904833,0.77643,0.701227,0.923774,0.970883,0.886135,0.897311,0.836834,0.896757
NILC variável std,0.035531,0.04038,0.019612,0.092204,0.01825,0.044454,0.060801,0.012877,0.004837,0.015796,0.006873,0.01643,0.006941
Acordãos fixo mean,0.836235,0.806861,0.784504,0.713898,0.901487,0.763376,0.675837,0.91932,0.967814,0.883952,0.894157,0.825328,0.892949
Acordãos fixo std,0.04838,0.015918,0.038609,0.109981,0.019782,0.053944,0.075166,0.016142,0.007614,0.012567,0.005749,0.019841,0.005914
Acordãos variável mean,0.835062,0.824652,0.7888,0.767185,0.894421,0.784493,0.709311,0.923726,0.969914,0.888006,0.898213,0.838557,0.897405
Acordãos variável std,0.040405,0.020315,0.036093,0.109382,0.014296,0.058275,0.067457,0.016115,0.007775,0.015679,0.006173,0.020968,0.006234


In [15]:
df_medias.T.to_csv('scores_embeddings_gru.csv', encoding = 'Latin1')