In [1]:
import json
import pandas as pd
from pathlib import Path
from pprint import pprint
import datetime
import time
import numpy as np
import matplotlib as plt

import pandas
import researchpy as rp
import seaborn as sns

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp

%matplotlib inline

In [2]:
#path_results = Path('/media/discoD/models/elmo/ner/results_20_epochs_ibm')
#path_results = Path('/media/discoD/models/elmo/ner/results_1_epoch_harem_all_combinations')
# path_results = Path('/media/discoD/models/elmo/ner/mestrado/results_harem_final/')
path_results = Path('/media/discoD/models/elmo/ner/mestrado/results_datalawyer_final_3.1/')

In [3]:
for folder in path_results.iterdir():
    print(folder)

/media/discoD/models/elmo/ner/mestrado/results_datalawyer_final_3.1/datalawyer-ft_ELMo+GloVe
/media/discoD/models/elmo/ner/mestrado/results_datalawyer_final_3.1/datalawyer-ft_ELMo+Word2Vec-jur


In [4]:
configs = dict()
metrics = dict()
for model_folder in path_results.iterdir():
#     print('Reading embeddings folder from %s' % model_folder.name)
    for embedding_folder in model_folder.iterdir():
#         print('Reading files from %s' % embedding_folder.name)
        key = embedding_folder.name
#         print('Reading files from %s' % key)
        has_metrics = False
        for results_file in embedding_folder.iterdir():
            if results_file.name.endswith('.json'):
                #print('Parsing data from %s' % results_file.name)
                if results_file.name.startswith('config'):
                    configs[key] = json.loads(results_file.read_bytes())
                elif results_file.name.startswith('metrics.'):
                    metrics[key] = json.loads(results_file.read_bytes())
                    has_metrics = True
        if not has_metrics:
            print('metrics.json not found for %s' % key)
            del configs[key]
    
print(len(metrics))
print(len(configs))

metrics.json not found for datalawyer-ft_ELMo+Word2Vec-jur_4
8
8


In [5]:
def get_seconds(time_str):
    x = time.strptime(time_str.split('.')[0],'%H:%M:%S')
    return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
def get_average_epoch_duration(metrics):
    seconds = get_seconds(metrics['training_duration'])
    training_epochs = metrics['training_epochs'] + 1
    return seconds / training_epochs

In [6]:
TRAINING_ID = 'Training_ID'
MODEL = 'Model'
REPRESENTATION = 'Representation'
EMBEDDING = 'Embedding'
EMBEDDING_TYPE = 'Embedding_Type'
BATCH_SIZE = 'Batch_Size'
ELMO_DROPOUT = 'ELMo_Dropout'
TRAINING_NUMBER = 'Training_Number'
EXECUTION_NUMBER = 'Execution_Number'
BEST_EPOCH = 'Best_Epoch'
TRAINING_EPOCHS = 'Training_Epochs'
TRAINING_DURATION = 'Training_Duration'
TOTAL_DURATION = 'Total_Duration(s)'
AVERAGE_EPOCH_DURATION = 'Average_Epoch_Duration(s)'
TRAINING_ACCURACY = 'Training_Accuracy'
TRAINING_ACCURACY_TOP3 = 'Training_Accuracy_Top-3'
TRAINING_PRECISION = 'Training_Precision'
TRAINING_RECALL = 'Training_Recall'
TRAINING_F1_MEASURE = 'Training_F1-Measure'
TRAINING_LOSS = 'Training_Loss'
BEST_VALIDATION_ACCURACY = 'Best_Validation_Accuracy'
BEST_VALIDATION_ACCURACY_TOP3 = 'Best_Validation_Accuracy_Top-3'
BEST_VALIDATION_PRECISION = 'Best_Validation_Precision'
BEST_VALIDATION_RECALL = 'Best_Validation_Recall'
BEST_VALIDATION_F1_MEASURE = 'Best_Validation_F1-Measure'
BEST_VALIDATION_LOSS = 'Best_Validation_Loss'
TEST_ACCURACY = 'Test_Accuracy'
TEST_ACCURACY_TOP3 = 'Test_Accuracy_Top-3'
TEST_PRECISION = 'Test_Precision'
TEST_RECALL = 'Test_Recall'
TEST_F1_MEASURE = 'Test_F1_Measure'
TEST_LOSS = 'Test_Loss'
columns = [TRAINING_ID, MODEL, REPRESENTATION, EMBEDDING, EMBEDDING_TYPE, TRAINING_NUMBER, EXECUTION_NUMBER, BEST_EPOCH, TRAINING_EPOCHS, TRAINING_DURATION, TOTAL_DURATION, AVERAGE_EPOCH_DURATION, TRAINING_ACCURACY, TRAINING_ACCURACY_TOP3, TRAINING_PRECISION, TRAINING_RECALL, TRAINING_F1_MEASURE, TRAINING_LOSS, BEST_VALIDATION_ACCURACY, BEST_VALIDATION_ACCURACY_TOP3, BEST_VALIDATION_PRECISION, BEST_VALIDATION_RECALL, BEST_VALIDATION_F1_MEASURE, BEST_VALIDATION_LOSS, TEST_ACCURACY, TEST_ACCURACY_TOP3, TEST_PRECISION, TEST_RECALL, TEST_F1_MEASURE, TEST_LOSS]
columns

['Training_ID',
 'Model',
 'Representation',
 'Embedding',
 'Embedding_Type',
 'Training_Number',
 'Execution_Number',
 'Best_Epoch',
 'Training_Epochs',
 'Training_Duration',
 'Total_Duration(s)',
 'Average_Epoch_Duration(s)',
 'Training_Accuracy',
 'Training_Accuracy_Top-3',
 'Training_Precision',
 'Training_Recall',
 'Training_F1-Measure',
 'Training_Loss',
 'Best_Validation_Accuracy',
 'Best_Validation_Accuracy_Top-3',
 'Best_Validation_Precision',
 'Best_Validation_Recall',
 'Best_Validation_F1-Measure',
 'Best_Validation_Loss',
 'Test_Accuracy',
 'Test_Accuracy_Top-3',
 'Test_Precision',
 'Test_Recall',
 'Test_F1_Measure',
 'Test_Loss']

In [7]:
def get_training_data_from_id(training_id, config):
    data = training_id.split('_')
    print(data)
    if len(data) == 3:
        return {MODEL: data[0], REPRESENTATION: data[1], EXECUTION_NUMBER: data[2]}
    else:
        print('Check id: %s' % training_id)

In [8]:
training_data = []
for training_id, config in configs.items():
    training_metrics = metrics[training_id]
    data = get_training_data_from_id(training_id, config)
    data[TRAINING_ID] = training_id
    data[BEST_EPOCH] = training_metrics['best_epoch'] if training_metrics else None
    data[TRAINING_EPOCHS] = training_metrics['training_epochs'] + 1 if training_metrics else None
    data[TRAINING_DURATION] = training_metrics['training_duration'] if training_metrics else None
    data[TOTAL_DURATION] = get_seconds(training_metrics['training_duration']) if training_metrics else None
    data[AVERAGE_EPOCH_DURATION] = get_average_epoch_duration(training_metrics) if training_metrics else None
    data[TRAINING_ACCURACY] = training_metrics['training_accuracy'] if training_metrics else None
    data[TRAINING_ACCURACY_TOP3] = training_metrics['training_accuracy3'] if training_metrics else None
    data[TRAINING_PRECISION] = training_metrics['training_precision-overall'] if training_metrics else None
    data[TRAINING_RECALL] = training_metrics['training_recall-overall'] if training_metrics else None
    data[TRAINING_F1_MEASURE] = training_metrics['training_f1-measure-overall'] if training_metrics else None
    data[TRAINING_LOSS] = training_metrics['training_loss'] if training_metrics else None
    data[BEST_VALIDATION_ACCURACY] = training_metrics['best_validation_accuracy'] if training_metrics else None
    data[BEST_VALIDATION_ACCURACY_TOP3] = training_metrics['best_validation_accuracy3'] if training_metrics else None
    data[BEST_VALIDATION_PRECISION] = training_metrics['best_validation_precision-overall'] if training_metrics else None
    data[BEST_VALIDATION_RECALL] = training_metrics['best_validation_recall-overall'] if training_metrics else None
    data[BEST_VALIDATION_F1_MEASURE] = training_metrics['best_validation_f1-measure-overall'] if training_metrics else None
    data[BEST_VALIDATION_LOSS] = training_metrics['best_validation_loss'] if training_metrics else None
    data[TEST_ACCURACY] = training_metrics['test_accuracy'] if training_metrics else None
    data[TEST_ACCURACY_TOP3] = training_metrics['test_accuracy3'] if training_metrics else None
    data[TEST_PRECISION] = training_metrics['test_precision-overall'] if training_metrics else None
    data[TEST_RECALL] = training_metrics['test_recall-overall'] if training_metrics else None
    data[TEST_F1_MEASURE] = training_metrics['test_f1-measure-overall'] if training_metrics else None
    data[TEST_LOSS] = training_metrics['test_loss'] if training_metrics else None
    training_data.append(data)

['datalawyer-ft', 'ELMo+GloVe', '1']
['datalawyer-ft', 'ELMo+GloVe', '2']
['datalawyer-ft', 'ELMo+GloVe', '3']
['datalawyer-ft', 'ELMo+GloVe', '0']
['datalawyer-ft', 'ELMo+Word2Vec-jur', '3']
['datalawyer-ft', 'ELMo+Word2Vec-jur', '2']
['datalawyer-ft', 'ELMo+Word2Vec-jur', '1']
['datalawyer-ft', 'ELMo+Word2Vec-jur', '0']


In [9]:
pd.set_option('display.max_columns', 1000)
training_data_df = pd.DataFrame(training_data, columns=columns)
# training_data_df = training_data_df[(training_data_df[EXECUTION_NUMBER]).astype(int) == 0]
training_data_df 

Unnamed: 0,Training_ID,Model,Representation,Embedding,Embedding_Type,Training_Number,Execution_Number,Best_Epoch,Training_Epochs,Training_Duration,Total_Duration(s),Average_Epoch_Duration(s),Training_Accuracy,Training_Accuracy_Top-3,Training_Precision,Training_Recall,Training_F1-Measure,Training_Loss,Best_Validation_Accuracy,Best_Validation_Accuracy_Top-3,Best_Validation_Precision,Best_Validation_Recall,Best_Validation_F1-Measure,Best_Validation_Loss,Test_Accuracy,Test_Accuracy_Top-3,Test_Precision,Test_Recall,Test_F1_Measure,Test_Loss
0,datalawyer-ft_ELMo+GloVe_1,datalawyer-ft,ELMo+GloVe,,,,1,36,50,3:59:44.932918,14384.0,287.68,0.998848,0.998867,0.987895,0.98767,0.987783,2.750891,0.99209,0.992318,0.93049,0.938895,0.934673,51.839145,0.9919,0.992301,0.926752,0.922833,0.924788,62.014531
1,datalawyer-ft_ELMo+GloVe_2,datalawyer-ft,ELMo+GloVe,,,,2,30,50,4:36:02.519238,16562.0,331.24,0.998946,0.998974,0.98859,0.989154,0.988872,2.763186,0.992197,0.992288,0.925771,0.94102,0.933333,51.567391,0.991255,0.991527,0.917504,0.917019,0.917261,61.093881
2,datalawyer-ft_ELMo+GloVe_3,datalawyer-ft,ELMo+GloVe,,,,3,16,41,3:13:14.429147,11594.0,282.780488,0.998748,0.998782,0.986314,0.987327,0.98682,3.446905,0.991968,0.992227,0.93038,0.937301,0.933827,42.90033,0.991054,0.991513,0.919063,0.912262,0.91565,55.933421
3,datalawyer-ft_ELMo+GloVe_0,datalawyer-ft,ELMo+GloVe,,,,0,28,50,3:50:25.519497,13825.0,276.5,0.998855,0.99888,0.988688,0.987898,0.988293,3.17523,0.991847,0.992181,0.931435,0.938363,0.934886,49.735681,0.991728,0.992301,0.927885,0.918076,0.922954,59.344358
4,datalawyer-ft_ELMo+Word2Vec-jur_3,datalawyer-ft,ELMo+Word2Vec-jur,,,,3,46,50,4:18:40.888916,15520.0,310.4,0.99877,0.998785,0.986991,0.987441,0.987216,3.192201,0.992197,0.992486,0.935398,0.930925,0.933156,50.034478,0.991613,0.991943,0.925867,0.917548,0.921688,59.783391
5,datalawyer-ft_ELMo+Word2Vec-jur_2,datalawyer-ft,ELMo+Word2Vec-jur,,,,2,41,50,4:24:18.579010,15858.0,317.16,0.998946,0.998961,0.989498,0.989611,0.989554,2.81705,0.992273,0.992592,0.938205,0.927736,0.932941,54.068517,0.991412,0.991871,0.930533,0.913319,0.921846,64.401873
6,datalawyer-ft_ELMo+Word2Vec-jur_1,datalawyer-ft,ELMo+Word2Vec-jur,,,,1,12,37,2:52:02.613075,10322.0,278.972973,0.998764,0.998779,0.98597,0.986871,0.98642,3.619495,0.991558,0.991771,0.933192,0.935175,0.934183,46.582752,0.991713,0.991943,0.924186,0.914905,0.919522,52.984502
7,datalawyer-ft_ELMo+Word2Vec-jur_0,datalawyer-ft,ELMo+Word2Vec-jur,,,,0,19,44,3:34:16.238528,12856.0,292.181818,0.998848,0.99888,0.989607,0.989268,0.989438,3.157069,0.991847,0.991968,0.928947,0.937832,0.933369,50.223099,0.991642,0.991957,0.924548,0.919662,0.922099,55.785441


In [10]:
for i in range(10):
    print(len(training_data_df[(training_data_df[EXECUTION_NUMBER]).astype(int) == i]))

2
2
2
2
0
0
0
0
0
0


In [11]:
training_data_df.to_csv('training_data_50_epochs_datalawyer_final.csv', index=None)

In [12]:
str(datetime.timedelta(seconds=training_data_df[TOTAL_DURATION].mean()))

'3:51:05.125000'

In [13]:
default_exported_columns = ['mean', 'min', 'max', 'std']
renamed_columns = ['Contagem', 'F-Score', 'Desvio Padrão', 'Mínimo', '25%', '50%', '75%', 'Máximo']
exported_columns_names = ['F-Score', 'Mínimo', 'Máximo', 'Desvio Padrão']
model_map = {'harem-ft': 'Sim', 'harem': 'Não'}
representation_map = {'ELMo+CNN+Embeddings': 'ELMo+CNN+Vetor', 'ELMo+Embeddings': 'ELMo+Vetor'}
embedding_type_map = {'skip': 'Skip-Gram', 'No': 'Sem Vetor', 'cbow': 'CBoW'}
embedding_map = {'wang2vec': 'Wang2Vec', 'glove': 'GloVe', 'word2vec': 'Word2Vec', 'No': 'Sem Vetor', 'fasttext': 'FastText'}
replacements_map = {**model_map, **representation_map, **embedding_type_map, **embedding_map}

def get_group_csv(index_names, group_name, group_columns, target_value, index_map, dataframe=training_data_df, exported_columns=default_exported_columns):
    if type(index_names) == str:
        index_names = [index_names]
    group = dataframe.groupby(group_columns).describe()[target_value].sort_values(by='mean', ascending=False)
    for column in exported_columns:
        group[column] = group[column] * 100
    group.index.names = index_names
    group = group.rename(index=index_map)
    group.columns = renamed_columns
    group.to_csv('grupo_' + group_name + '_50_epochs_mestrado_datalawyer_final.csv', columns=exported_columns_names, float_format = '%.2f%%')
    return group

In [14]:
get_group_csv('Representação', REPRESENTATION, REPRESENTATION, TEST_F1_MEASURE, replacements_map)

Unnamed: 0_level_0,Contagem,F-Score,Desvio Padrão,Mínimo,25%,50%,75%,Máximo
Representação,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ELMo+Word2Vec-jur,4.0,92.128866,0.118989,91.952191,0.921147,0.921767,0.921909,92.209857
ELMo+GloVe,4.0,92.016343,0.439586,91.564987,0.916859,0.920108,0.923413,92.478814


In [15]:
scores_path = Path('/media/discoD/models/elmo/ner/mestrado/scores_final')

In [16]:
scores_wang2vec_selective, scores_wang2vec_total, scores_glove_selective, scores_glove_total = [], [], [], []
for score_file in scores_path.iterdir():
    score = score_file.open(mode='r', encoding='utf8').readlines()[1].split()[-1]
    if 'Wang2Vec' in score_file.name:
        if 'selective' in score_file.name:
            scores_wang2vec_selective.append(float(score))
        elif 'total' in score_file.name:
            scores_wang2vec_total.append(float(score))
    elif 'GloVe' in score_file.name:
        if 'selective' in score_file.name:
            scores_glove_selective.append(float(score))
        elif 'total' in score_file.name:
            scores_glove_total.append(float(score))
print(len(scores_wang2vec_selective))
print(len(scores_wang2vec_total))
print(len(scores_glove_selective))
print(len(scores_glove_total))

FileNotFoundError: [Errno 2] No such file or directory: '/media/discoD/models/elmo/ner/mestrado/scores_final'

In [None]:
import statistics

In [None]:
print(statistics.mean(scores_wang2vec_selective))
print(statistics.mean(scores_wang2vec_total))
print(statistics.mean(scores_glove_selective))
print(statistics.mean(scores_glove_total))