In [1]:
import os
import json
import pandas as pd
import numpy as np
from modules.results_extraction import results_by_subdistribution, results_by_named_entity, \
                                    confusion_matrix, extract_results

In [2]:
def get_results_from_directory(model_directory):
    with open(os.path.join('models', model_directory, 'results.txt')) as file:
        results_json = json.loads(file.read())
        return results_by_subdistribution(results_json), results_by_named_entity(results_json)

In [3]:
totals_by_subdistribution = {}
totals_by_named_entity = {}

model_directories = [
    'model_gaz_loc',
    'model_gaz_loc_variants',
    'model_gaz_loc_k2sitsi',
    'model_gaz_loc_variants_k2sitsi'
]

for model_directory in model_directories:
    by_subdistribution, by_named_entity = get_results_from_directory(model_directory)
    totals_by_subdistribution[model_directory] = by_subdistribution['Total']
    totals_by_named_entity[model_directory] = by_named_entity['results']

# Total values by model:

In [4]:
display(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]).T)

Unnamed: 0,Precision,Recall,F1-score
model_gaz_loc,0.847546,0.784485,0.814797
model_gaz_loc_variants,0.853579,0.790069,0.820597
model_gaz_loc_k2sitsi,0.039756,0.036372,0.037989
model_gaz_loc_variants_k2sitsi,0.039887,0.036372,0.038049


<code>_k2sitsi</code> mudelite tulemused on madalad, sest mudelit kontrollides lähevad seal sõnestuse tõttu indeksid paigast ära.

# Total values by named entity:

In [5]:
display(pd.DataFrame(totals_by_named_entity).T)

Unnamed: 0,ORG_precision,ORG_recall,ORG_f1score,PER_precision,PER_recall,PER_f1score,MISC_precision,MISC_recall,MISC_f1score,LOC_precision,LOC_recall,LOC_f1score,LOC_ORG_precision,LOC_ORG_recall,LOC_ORG_f1score
model_gaz_loc,0.451613,0.424242,0.4375,0.900292,0.868066,0.883885,0.263158,0.178571,0.212766,0.354545,0.21727,0.26943,0.6623,0.543608,0.597113
model_gaz_loc_variants,0.442623,0.409091,0.425197,0.906832,0.875562,0.890923,0.294118,0.178571,0.222222,0.385321,0.233983,0.291161,0.652555,0.53405,0.587385
model_gaz_loc_k2sitsi,0.031746,0.030303,0.031008,0.042037,0.039918,0.04095,0.0,0.0,0.0,0.024752,0.013928,0.017825,0.029453,0.02509,0.027097
model_gaz_loc_variants_k2sitsi,0.032787,0.030303,0.031496,0.042153,0.039918,0.041005,0.0,0.0,0.0,0.024752,0.013928,0.017825,0.029577,0.02509,0.027149


In [6]:
files = []
for file in os.listdir(os.path.join('test', 'flattened_json_files')):
    if file.endswith('.json'):
        files.append(file)

In [7]:
for model in model_directories:
    y_true, y_pred = confusion_matrix(model, files)
    print(model)
    print(pd.crosstab(y_true, y_pred))
    print('\n')

model_gaz_loc
Predicted  LOC  LOC_ORG  MISC  ORG   PER
Actual                                  
LOC         78       28     0    2    24
LOC_ORG      9      455     0    2    27
MISC         0        0     5    0     1
ORG          0        3     0   28     2
PER          5        9     0    0  4632


model_gaz_loc_variants
Predicted  LOC  LOC_ORG  ORG  PER
Actual                           
LOC          5        1    0    2
LOC_ORG      0       22    0    3
ORG          0        0    2    0
PER          0        3    0  212


model_gaz_loc_k2sitsi
Predicted  LOC  LOC_ORG  ORG  PER
Actual                           
LOC          5        1    0    2
LOC_ORG      0       21    0    3
ORG          0        0    2    0
PER          0        2    0  213


model_gaz_loc_variants_k2sitsi
Predicted  LOC  LOC_ORG  ORG  PER
Actual                           
LOC          5        1    0    2
LOC_ORG      0       21    0    3
ORG          0        0    2    0
PER          0        3    0  213


