In [1]:
import json
import pandas as pd
from pathlib import Path
from pprint import pprint
import datetime
import time
import numpy as np
import matplotlib as plt

import pandas
import researchpy as rp
import seaborn as sns

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp

%matplotlib inline

In [2]:
#path_results = Path('/media/discoD/models/elmo/ner/results_20_epochs_ibm')
#path_results = Path('/media/discoD/models/elmo/ner/results_1_epoch_harem_all_combinations')
# path_results = Path('/media/discoD/models/elmo/ner/mestrado/results_harem_final/')
path_results = Path('/media/discoD/models/elmo/ner/mestrado/results_final/')

In [3]:
for folder in path_results.iterdir():
    print(folder)

/media/discoD/models/elmo/ner/mestrado/results_final/harem-wikibrwac-ft_ELMo+CNN+Wang2Vec
/media/discoD/models/elmo/ner/mestrado/results_final/harem-wikibrwac-ft_ELMo+GloVe


In [4]:
configs = dict()
metrics = dict()
for model_folder in path_results.iterdir():
#     print('Reading embeddings folder from %s' % model_folder.name)
    for embedding_folder in model_folder.iterdir():
#         print('Reading files from %s' % embedding_folder.name)
        key = embedding_folder.name
#         print('Reading files from %s' % key)
        has_metrics = False
        for results_file in embedding_folder.iterdir():
            if results_file.name.endswith('.json'):
                #print('Parsing data from %s' % results_file.name)
                if results_file.name.startswith('config'):
                    configs[key] = json.loads(results_file.read_bytes())
                elif results_file.name.startswith('metrics.'):
                    metrics[key] = json.loads(results_file.read_bytes())
                    has_metrics = True
        if not has_metrics:
            print('metrics.json not found for %s' % key)
            del configs[key]
    
print(len(metrics))
print(len(configs))

40
40


In [5]:
def get_seconds(time_str):
    x = time.strptime(time_str.split('.')[0],'%H:%M:%S')
    return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
def get_average_epoch_duration(metrics):
    seconds = get_seconds(metrics['training_duration'])
    training_epochs = metrics['training_epochs'] + 1
    return seconds / training_epochs

In [6]:
TRAINING_ID = 'Training_ID'
SCENARIO = 'Scenario'
MODEL = 'Model'
REPRESENTATION = 'Representation'
EMBEDDING = 'Embedding'
EMBEDDING_TYPE = 'Embedding_Type'
BATCH_SIZE = 'Batch_Size'
ELMO_DROPOUT = 'ELMo_Dropout'
TRAINING_NUMBER = 'Training_Number'
EXECUTION_NUMBER = 'Execution_Number'
BEST_EPOCH = 'Best_Epoch'
TRAINING_EPOCHS = 'Training_Epochs'
TRAINING_DURATION = 'Training_Duration'
TOTAL_DURATION = 'Total_Duration(s)'
AVERAGE_EPOCH_DURATION = 'Average_Epoch_Duration(s)'
TRAINING_ACCURACY = 'Training_Accuracy'
TRAINING_ACCURACY_TOP3 = 'Training_Accuracy_Top-3'
TRAINING_PRECISION = 'Training_Precision'
TRAINING_RECALL = 'Training_Recall'
TRAINING_F1_MEASURE = 'Training_F1-Measure'
TRAINING_LOSS = 'Training_Loss'
BEST_VALIDATION_ACCURACY = 'Best_Validation_Accuracy'
BEST_VALIDATION_ACCURACY_TOP3 = 'Best_Validation_Accuracy_Top-3'
BEST_VALIDATION_PRECISION = 'Best_Validation_Precision'
BEST_VALIDATION_RECALL = 'Best_Validation_Recall'
BEST_VALIDATION_F1_MEASURE = 'Best_Validation_F1-Measure'
BEST_VALIDATION_LOSS = 'Best_Validation_Loss'
TEST_ACCURACY = 'Test_Accuracy'
TEST_ACCURACY_TOP3 = 'Test_Accuracy_Top-3'
TEST_PRECISION = 'Test_Precision'
TEST_RECALL = 'Test_Recall'
TEST_F1_MEASURE = 'Test_F1_Measure'
TEST_LOSS = 'Test_Loss'
columns = [TRAINING_ID, MODEL, SCENARIO, REPRESENTATION, EMBEDDING, EMBEDDING_TYPE, TRAINING_NUMBER, EXECUTION_NUMBER, BEST_EPOCH, TRAINING_EPOCHS, TRAINING_DURATION, TOTAL_DURATION, AVERAGE_EPOCH_DURATION, TRAINING_ACCURACY, TRAINING_ACCURACY_TOP3, TRAINING_PRECISION, TRAINING_RECALL, TRAINING_F1_MEASURE, TRAINING_LOSS, BEST_VALIDATION_ACCURACY, BEST_VALIDATION_ACCURACY_TOP3, BEST_VALIDATION_PRECISION, BEST_VALIDATION_RECALL, BEST_VALIDATION_F1_MEASURE, BEST_VALIDATION_LOSS, TEST_ACCURACY, TEST_ACCURACY_TOP3, TEST_PRECISION, TEST_RECALL, TEST_F1_MEASURE, TEST_LOSS]
columns

['Training_ID',
 'Model',
 'Scenario',
 'Representation',
 'Embedding',
 'Embedding_Type',
 'Training_Number',
 'Execution_Number',
 'Best_Epoch',
 'Training_Epochs',
 'Training_Duration',
 'Total_Duration(s)',
 'Average_Epoch_Duration(s)',
 'Training_Accuracy',
 'Training_Accuracy_Top-3',
 'Training_Precision',
 'Training_Recall',
 'Training_F1-Measure',
 'Training_Loss',
 'Best_Validation_Accuracy',
 'Best_Validation_Accuracy_Top-3',
 'Best_Validation_Precision',
 'Best_Validation_Recall',
 'Best_Validation_F1-Measure',
 'Best_Validation_Loss',
 'Test_Accuracy',
 'Test_Accuracy_Top-3',
 'Test_Precision',
 'Test_Recall',
 'Test_F1_Measure',
 'Test_Loss']

In [7]:
def get_training_data_from_id(training_id, config):
    data = training_id.split('_')
    print(data)
    if len(data) == 4:
        return {MODEL: data[0], REPRESENTATION: data[1], SCENARIO: data[2], EXECUTION_NUMBER: data[3]}
    else:
        print('Check id: %s' % training_id)

In [8]:
training_data = []
for training_id, config in configs.items():
    training_metrics = metrics[training_id]
    data = get_training_data_from_id(training_id, config)
    data[TRAINING_ID] = training_id
    data[BEST_EPOCH] = training_metrics['best_epoch'] if training_metrics else None
    data[TRAINING_EPOCHS] = training_metrics['training_epochs'] + 1 if training_metrics else None
    data[TRAINING_DURATION] = training_metrics['training_duration'] if training_metrics else None
    data[TOTAL_DURATION] = get_seconds(training_metrics['training_duration']) if training_metrics else None
    data[AVERAGE_EPOCH_DURATION] = get_average_epoch_duration(training_metrics) if training_metrics else None
    data[TRAINING_ACCURACY] = training_metrics['training_accuracy'] if training_metrics else None
    data[TRAINING_ACCURACY_TOP3] = training_metrics['training_accuracy3'] if training_metrics else None
    data[TRAINING_PRECISION] = training_metrics['training_precision-overall'] if training_metrics else None
    data[TRAINING_RECALL] = training_metrics['training_recall-overall'] if training_metrics else None
    data[TRAINING_F1_MEASURE] = training_metrics['training_f1-measure-overall'] if training_metrics else None
    data[TRAINING_LOSS] = training_metrics['training_loss'] if training_metrics else None
    data[BEST_VALIDATION_ACCURACY] = training_metrics['best_validation_accuracy'] if training_metrics else None
    data[BEST_VALIDATION_ACCURACY_TOP3] = training_metrics['best_validation_accuracy3'] if training_metrics else None
    data[BEST_VALIDATION_PRECISION] = training_metrics['best_validation_precision-overall'] if training_metrics else None
    data[BEST_VALIDATION_RECALL] = training_metrics['best_validation_recall-overall'] if training_metrics else None
    data[BEST_VALIDATION_F1_MEASURE] = training_metrics['best_validation_f1-measure-overall'] if training_metrics else None
    data[BEST_VALIDATION_LOSS] = training_metrics['best_validation_loss'] if training_metrics else None
    data[TEST_ACCURACY] = training_metrics['test_accuracy'] if training_metrics else None
    data[TEST_ACCURACY_TOP3] = training_metrics['test_accuracy3'] if training_metrics else None
    data[TEST_PRECISION] = training_metrics['test_precision-overall'] if training_metrics else None
    data[TEST_RECALL] = training_metrics['test_recall-overall'] if training_metrics else None
    data[TEST_F1_MEASURE] = training_metrics['test_f1-measure-overall'] if training_metrics else None
    data[TEST_LOSS] = training_metrics['test_loss'] if training_metrics else None
    training_data.append(data)

['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '0']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '1']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '2']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '3']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '4']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '5']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '6']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '7']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '8']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'selective', '9']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'total', '0']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'total', '1']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'total', '2']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'total', '3']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'total', '4']
['harem-wikibrwac-ft', 'ELMo+CNN+Wang2Vec', 'total', '5']
['harem-wikibrwac-ft', 'ELMo+CNN

In [9]:
pd.set_option('display.max_columns', 1000)
training_data_df = pd.DataFrame(training_data, columns=columns)
# training_data_df = training_data_df[(training_data_df[EXECUTION_NUMBER]).astype(int) == 0]
training_data_df 

Unnamed: 0,Training_ID,Model,Scenario,Representation,Embedding,Embedding_Type,Training_Number,Execution_Number,Best_Epoch,Training_Epochs,Training_Duration,Total_Duration(s),Average_Epoch_Duration(s),Training_Accuracy,Training_Accuracy_Top-3,Training_Precision,Training_Recall,Training_F1-Measure,Training_Loss,Best_Validation_Accuracy,Best_Validation_Accuracy_Top-3,Best_Validation_Precision,Best_Validation_Recall,Best_Validation_F1-Measure,Best_Validation_Loss,Test_Accuracy,Test_Accuracy_Top-3,Test_Precision,Test_Recall,Test_F1_Measure,Test_Loss
0,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_0,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,0,18,43,0:35:05.529400,2105.0,48.953488,0.999706,0.999726,0.995861,0.997561,0.99671,1.638087,0.979632,0.980968,0.844367,0.813356,0.828571,183.27447,0.979632,0.980968,0.844367,0.813356,0.828571,183.282954
1,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_1,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,1,39,50,0:40:55.000682,2455.0,49.1,0.999736,0.999736,0.997072,0.996585,0.996828,0.808293,0.980158,0.981629,0.849965,0.813356,0.831257,273.187436,0.980158,0.981629,0.849965,0.813356,0.831257,273.197394
2,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_2,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,2,20,45,0:37:07.398736,2227.0,49.488889,0.999777,0.999797,0.997072,0.996829,0.996951,0.986195,0.979617,0.981058,0.853488,0.813022,0.832763,210.706275,0.979617,0.981058,0.853488,0.813022,0.832763,210.703288
3,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_3,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,3,31,50,0:41:07.584479,2467.0,49.34,0.999736,0.999756,0.996831,0.997317,0.997074,0.982821,0.980593,0.981959,0.848006,0.823372,0.835507,235.149607,0.980593,0.981959,0.848006,0.823372,0.835507,235.155054
4,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_4,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,4,14,39,0:32:05.799800,1925.0,49.358974,0.999706,0.999716,0.997072,0.996829,0.996951,1.306922,0.980563,0.981839,0.845836,0.820701,0.833079,159.297179,0.980578,0.981854,0.84618,0.821035,0.833418,159.295671
5,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_5,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,5,14,39,0:32:17.363800,1937.0,49.666667,0.999746,0.999766,0.996828,0.996585,0.996707,1.344295,0.979992,0.981298,0.831773,0.830384,0.831078,163.000181,0.979992,0.981298,0.831773,0.830384,0.831078,162.999026
6,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_6,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,6,22,47,0:38:27.556357,2307.0,49.085106,0.999655,0.999675,0.997072,0.996585,0.996828,1.683995,0.979557,0.980908,0.85377,0.809015,0.83079,225.965356,0.979557,0.980908,0.85377,0.809015,0.83079,225.969567
7,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_7,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,7,16,41,0:33:34.407302,2014.0,49.121951,0.999675,0.999685,0.996099,0.996341,0.99622,1.348052,0.979662,0.980938,0.853097,0.804674,0.828179,184.388008,0.979662,0.980938,0.853097,0.804674,0.828179,184.388875
8,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_8,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,8,41,50,0:41:10.791327,2470.0,49.4,0.999868,0.999868,0.998294,0.99878,0.998537,0.667153,0.980248,0.981568,0.850783,0.816694,0.83339,289.23549,0.980248,0.981568,0.850783,0.816694,0.83339,289.244127
9,harem-wikibrwac-ft_ELMo+CNN+Wang2Vec_selective_9,harem-wikibrwac-ft,selective,ELMo+CNN+Wang2Vec,,,,9,45,50,0:41:02.302884,2462.0,49.24,0.999736,0.999766,0.996829,0.996829,0.996829,1.230305,0.980173,0.981674,0.858209,0.806344,0.831468,298.153052,0.980173,0.981674,0.858209,0.806344,0.831468,298.147738


In [10]:
for i in range(10):
    print(len(training_data_df[(training_data_df[EXECUTION_NUMBER]).astype(int) == i]))

4
4
4
4
4
4
4
4
4
4


In [11]:
#training_data_df.to_csv('training_data_5_epochs_mestrado_all_reps.csv', index=None)

In [12]:
str(datetime.timedelta(seconds=training_data_df[TOTAL_DURATION].mean()))

'0:36:37.950000'

In [26]:
default_exported_columns = ['mean', 'min', 'max', 'std']
renamed_columns = ['Contagem', 'F-Score', 'Desvio Padrão', 'Mínimo', '25%', '50%', '75%', 'Máximo']
exported_columns_names = ['F-Score', 'Mínimo', 'Máximo', 'Desvio Padrão']
model_map = {'harem-ft': 'Sim', 'harem': 'Não'}
representation_map = {'ELMo+CNN+Embeddings': 'ELMo+CNN+Vetor', 'ELMo+Embeddings': 'ELMo+Vetor'}
embedding_type_map = {'skip': 'Skip-Gram', 'No': 'Sem Vetor', 'cbow': 'CBoW'}
embedding_map = {'wang2vec': 'Wang2Vec', 'glove': 'GloVe', 'word2vec': 'Word2Vec', 'No': 'Sem Vetor', 'fasttext': 'FastText'}
replacements_map = {**model_map, **representation_map, **embedding_type_map, **embedding_map}

def get_group_csv(index_names, group_name, group_columns, target_value, index_map, dataframe=training_data_df, exported_columns=default_exported_columns):
    if type(index_names) == str:
        index_names = [index_names]
    group = dataframe.groupby(group_columns).describe()[target_value].sort_values(by='mean', ascending=False)
    for column in exported_columns:
        group[column] = group[column] * 100
    group.index.names = index_names
    group = group.rename(index=index_map)
    group.columns = renamed_columns
    group.to_csv('grupo_' + group_name + '_50_epochs_mestrado_harem_final.csv', columns=exported_columns_names, float_format = '%.2f%%')
    return group

In [27]:
get_group_csv(['Representação', 'Cenário'], 'Rep_Cen', [REPRESENTATION, SCENARIO], TEST_F1_MEASURE, replacements_map)

Unnamed: 0_level_0,Unnamed: 1_level_0,Contagem,F-Score,Desvio Padrão,Mínimo,25%,50%,75%,Máximo
Representação,Cenário,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ELMo+CNN+Wang2Vec,selective,10.0,83.164229,0.22374,82.817869,0.830862,0.831363,0.833233,83.550737
ELMo+GloVe,selective,10.0,82.746833,0.139587,82.561308,0.826505,0.827167,0.828504,82.993427
ELMo+CNN+Wang2Vec,total,10.0,77.808924,0.272999,77.3285,0.776989,0.777948,0.77968,78.190223
ELMo+GloVe,total,10.0,77.34886,0.248246,76.933876,0.773014,0.773828,0.774246,77.704318


In [15]:
scores_path = Path('/media/discoD/models/elmo/ner/mestrado/scores_final')

In [29]:
scores_wang2vec_selective, scores_wang2vec_total, scores_glove_selective, scores_glove_total = [], [], [], []
for score_file in scores_path.iterdir():
    score = score_file.open(mode='r', encoding='utf8').readlines()[1].split()[-1]
    if 'Wang2Vec' in score_file.name:
        if 'selective' in score_file.name:
            scores_wang2vec_selective.append(float(score))
        elif 'total' in score_file.name:
            scores_wang2vec_total.append(float(score))
    elif 'GloVe' in score_file.name:
        if 'selective' in score_file.name:
            scores_glove_selective.append(float(score))
        elif 'total' in score_file.name:
            scores_glove_total.append(float(score))
print(len(scores_wang2vec_selective))
print(len(scores_wang2vec_total))
print(len(scores_glove_selective))
print(len(scores_glove_total))

10
10
10
10


In [30]:
import statistics

In [34]:
print(statistics.mean(scores_wang2vec_selective))
print(statistics.mean(scores_wang2vec_total))
print(statistics.mean(scores_glove_selective))
print(statistics.mean(scores_glove_total))

83.222
78.037
82.886
77.631
