In [1]:
import os
import json
import pandas as pd
import numpy as np
from modules.extract_results import display_results_by_subdistribution, display_results_by_named_entity, \
                                    display_confusion_matrix, extract_results_to_txt_file

In [2]:
def get_results_from_directory(model_directory):
    with open(os.path.join('models', model_directory, 'results.txt')) as file:
        results_json = json.loads(file.read())
        return display_results_by_subdistribution(results_json)["Total"], display_results_by_named_entity(results_json)["Total"]

In [3]:
totals_by_subdistribution = {}
totals_by_named_entity = {}

model_directories = [
    'model_gaz_loc',
    'model_gaz_loc_variants',
    'model_gaz_loc_k2sitsi',
    'model_gaz_loc_variants_k2sitsi'
]

for model_directory in model_directories:
    by_subdistribution, by_named_entity = get_results_from_directory(model_directory)
    totals_by_subdistribution[model_directory] = by_subdistribution
    totals_by_named_entity[model_directory] = by_named_entity

# Total values by model:

In [4]:
display(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]).T)

Unnamed: 0,Precision,Recall,F1-score
model_gaz_loc,0.849723,0.787655,0.817513
model_gaz_loc_variants,0.853579,0.790069,0.820597
model_gaz_loc_k2sitsi,0.039756,0.036372,0.037989
model_gaz_loc_variants_k2sitsi,0.039887,0.036372,0.038049


# Total values by named entity:

In [5]:
display(pd.DataFrame(totals_by_named_entity).T)

Unnamed: 0,ORG_precision,ORG_recall,ORG_f1score,PER_precision,PER_recall,PER_f1score,MISC_precision,MISC_recall,MISC_f1score,LOC_precision,LOC_recall,LOC_f1score,LOC_ORG_precision,LOC_ORG_recall,LOC_ORG_f1score
model_gaz_loc,0.090323,0.084848,0.0875,0.180827,0.174625,0.177672,0.052632,0.035714,0.042553,0.070588,0.043454,0.053793,0.130713,0.107288,0.117848
model_gaz_loc_variants,0.088525,0.081818,0.085039,0.181366,0.175112,0.178185,0.058824,0.035714,0.044444,0.077064,0.046797,0.058232,0.130511,0.10681,0.117477
model_gaz_loc_k2sitsi,0.006349,0.006061,0.006202,0.008407,0.007984,0.00819,0.0,0.0,0.0,0.00495,0.002786,0.003565,0.005891,0.005018,0.005419
model_gaz_loc_variants_k2sitsi,0.006557,0.006061,0.006299,0.008431,0.007984,0.008201,0.0,0.0,0.0,0.00495,0.002786,0.003565,0.005915,0.005018,0.00543


In [6]:
files = []
for file in os.listdir(os.path.join('test', 'flattened_json_files')):
    if file.endswith('.json'):
        files.append(file)

In [14]:
for model in model_directories:
    y_true, y_pred = display_confusion_matrix(model, files)
    print(model)
    print(pd.crosstab(y_true, y_pred))
    print('\n')

model_gaz_loc
Predicted  LOC  LOC_ORG  ORG  PER
Actual                           
LOC          5        1    0    2
LOC_ORG      0       23    0    3
ORG          0        0    2    0
PER          0        3    0  211


model_gaz_loc_variants
Predicted  LOC  LOC_ORG  ORG  PER
Actual                           
LOC          5        1    0    2
LOC_ORG      0       22    0    3
ORG          0        0    2    0
PER          0        3    0  212


model_gaz_loc_k2sitsi
Predicted  LOC  LOC_ORG  ORG  PER
Actual                           
LOC          5        1    0    2
LOC_ORG      0       21    0    3
ORG          0        0    2    0
PER          0        2    0  213


model_gaz_loc_variants_k2sitsi
Predicted  LOC  LOC_ORG  ORG  PER
Actual                           
LOC          5        1    0    2
LOC_ORG      0       21    0    3
ORG          0        0    2    0
PER          0        3    0  213


