In [1]:
import os
import json
import pandas as pd
from modules.extract_results import display_results_by_subdistribution, display_results_by_named_entity, \
                                    display_confusion_matrix, extract_results_to_txt_file

In [2]:
def format_vertical_headers(df):
    """Display a dataframe with vertical column headers"""
    styles = [dict(selector="th", props=[('width', '40px')]),
              dict(selector="th.col_heading",
                   props=[("writing-mode", "vertical-rl"),
                          ('transform', 'rotateZ(180deg)'), 
                          ('height', '290px'),
                          ('vertical-align', 'top')])]
    return (df.fillna('').style.set_table_styles(styles))

In [3]:
totals_by_subdistribution = {}
totals_by_named_entity = {}
for directory in os.listdir("models"):
    if directory.startswith("model_"):
        try:
            with open(os.path.join('models', directory, 'results.txt')) as file:
                results_json = json.loads(file.read())
                totals_by_subdistribution[directory] = display_results_by_subdistribution(results_json)["Total"]
                totals_by_named_entity[directory] = display_results_by_named_entity(results_json)["Total"]
        except FileNotFoundError:
            print(f"(!) Mudelil {directory} puuduvad tulemused.")

# Total values by model:

In [4]:
format_vertical_headers(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
Precision,0.904527,0.864699,0.86989,0.894995,0.894828,0.896492,0.900805
Recall,0.874598,0.832153,0.845586,0.861079,0.861379,0.86198,0.873782
F1-score,0.889311,0.848114,0.857566,0.877709,0.877785,0.878897,0.887088


# Total values by named entity:

In [5]:
format_vertical_headers(pd.DataFrame(totals_by_named_entity))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
ORG_precision,0.779428,0.687798,0.708073,0.748684,0.765515,0.777888,0.772027
ORG_recall,0.744606,0.693405,0.694753,0.712056,0.728182,0.729497,0.739187
ORG_f1score,0.761262,0.689991,0.700954,0.72985,0.746356,0.752775,0.75517
PER_precision,0.935972,0.902675,0.910621,0.927427,0.926616,0.928038,0.933056
PER_recall,0.925524,0.887524,0.899791,0.91309,0.912187,0.913225,0.924506
PER_f1score,0.930714,0.895002,0.905156,0.920183,0.919331,0.920542,0.928745
MISC_precision,0.740069,0.711702,0.719368,0.717757,0.745717,0.719314,0.732389
MISC_recall,0.649946,0.652229,0.624728,0.611506,0.626977,0.603299,0.617099
MISC_f1score,0.688467,0.676465,0.665965,0.658318,0.679096,0.653611,0.667419
LOC_precision,0.609861,0.491394,0.498547,0.580112,0.572535,0.589644,0.593525


# All models:

### model_default

In [6]:
with open(os.path.join('models', 'model_default', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.911927,0.915015,0.905627,0.882717,0.911048,0.904527
Recall,0.879646,0.889086,0.880416,0.847297,0.881895,0.874598
F1-score,0.895495,0.901864,0.892844,0.864645,0.896234,0.889311


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.777778,0.777778,0.848485,0.764706,0.728395,0.779428
ORG_recall,0.711864,0.742424,0.8,0.712329,0.75641,0.744606
ORG_f1score,0.743363,0.75969,0.823529,0.737589,0.742138,0.761262
PER_precision,0.939442,0.945081,0.939801,0.908825,0.946711,0.935972
PER_recall,0.927235,0.92801,0.928406,0.905157,0.938812,0.925524
PER_f1score,0.933298,0.936468,0.934069,0.906987,0.942745,0.930714
MISC_precision,0.7,0.868421,0.74359,0.756757,0.631579,0.740069
MISC_recall,0.512195,0.647059,0.690476,0.8,0.6,0.649946
MISC_f1score,0.591549,0.741573,0.716049,0.777778,0.615385,0.688467
LOC_precision,0.533981,0.650602,0.641667,0.556391,0.666667,0.609861


### model_local_features_without_morph

In [7]:
with open(os.path.join('models', 'model_local_features_without_morph', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.858887,0.877747,0.870244,0.847991,0.872068,0.864699
Recall,0.840265,0.831304,0.836117,0.811443,0.845263,0.832153
F1-score,0.849474,0.853894,0.85284,0.829315,0.858456,0.848114


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.807018,0.661538,0.701493,0.666667,0.602273,0.687798
ORG_recall,0.779661,0.651515,0.671429,0.684932,0.679487,0.693405
ORG_f1score,0.793103,0.656489,0.686131,0.675676,0.638554,0.689991
PER_precision,0.89704,0.910724,0.907134,0.884206,0.914271,0.902675
PER_recall,0.889813,0.875916,0.888279,0.874888,0.908723,0.887524
PER_f1score,0.893412,0.892981,0.897608,0.879522,0.911489,0.895002
MISC_precision,0.567568,0.857143,0.675,0.702703,0.756098,0.711702
MISC_recall,0.512195,0.588235,0.642857,0.742857,0.775,0.652229
MISC_f1score,0.538462,0.697674,0.658537,0.722222,0.765432,0.676465
LOC_precision,0.349593,0.577381,0.591241,0.463576,0.475177,0.491394


### model_morph_without_lemmas

In [8]:
with open(os.path.join('models', 'model_morph_without_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.882516,0.853945,0.873412,0.855577,0.886398,0.86989
Recall,0.847566,0.853,0.865166,0.812187,0.857474,0.845586
F1-score,0.864688,0.853472,0.869269,0.833317,0.871696,0.857566


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.754717,0.617647,0.732394,0.708333,0.727273,0.708073
ORG_recall,0.677966,0.636364,0.742857,0.69863,0.717949,0.694753
ORG_f1score,0.714286,0.626866,0.737589,0.703448,0.722581,0.700954
PER_precision,0.919071,0.898814,0.915558,0.891873,0.927788,0.910621
PER_recall,0.894231,0.892932,0.913972,0.878475,0.919343,0.899791
PER_f1score,0.906481,0.895863,0.914765,0.885124,0.923546,0.905156
MISC_precision,0.655172,0.775,0.7,0.666667,0.8,0.719368
MISC_recall,0.463415,0.607843,0.666667,0.685714,0.7,0.624728
MISC_f1score,0.542857,0.681319,0.682927,0.676056,0.746667,0.665965
LOC_precision,0.389381,0.494792,0.562914,0.527132,0.518519,0.498547


### model_morph_with_lemmas

In [9]:
with open(os.path.join('models', 'model_morph_with_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.901499,0.902439,0.895285,0.870316,0.90935,0.894995
Recall,0.864602,0.868275,0.873396,0.832807,0.872211,0.861079
F1-score,0.882665,0.885028,0.884205,0.851149,0.890393,0.877709


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.709091,0.71875,0.808824,0.75,0.756757,0.748684
ORG_recall,0.661017,0.69697,0.785714,0.69863,0.717949,0.712056
ORG_f1score,0.684211,0.707692,0.797101,0.723404,0.736842,0.72985
PER_precision,0.932519,0.934683,0.928987,0.899321,0.941626,0.927427
PER_recall,0.912162,0.906545,0.921478,0.891256,0.934008,0.91309
PER_f1score,0.922228,0.920399,0.925217,0.89527,0.937801,0.920183
MISC_precision,0.612903,0.820513,0.735294,0.771429,0.648649,0.717757
MISC_recall,0.463415,0.627451,0.595238,0.771429,0.6,0.611506
MISC_f1score,0.527778,0.711111,0.657895,0.771429,0.623377,0.658318
LOC_precision,0.5,0.628049,0.615385,0.542373,0.614754,0.580112


### model_morph_with_lemmas_and_sentences

In [10]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.898402,0.90254,0.898229,0.870761,0.908352,0.894828
Recall,0.870575,0.865176,0.871702,0.83485,0.870105,0.861379
F1-score,0.88427,0.883463,0.884767,0.852428,0.888817,0.877785


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.767857,0.725806,0.823529,0.753623,0.756757,0.765515
ORG_recall,0.728814,0.681818,0.8,0.712329,0.717949,0.728182
ORG_f1score,0.747826,0.703125,0.811594,0.732394,0.736842,0.746356
PER_precision,0.930688,0.933944,0.930873,0.898531,0.939046,0.926616
PER_recall,0.914241,0.906806,0.917436,0.89148,0.930973,0.912187
PER_f1score,0.922391,0.920175,0.924106,0.894992,0.934992,0.919331
MISC_precision,0.65625,0.864865,0.735294,0.823529,0.648649,0.745717
MISC_recall,0.512195,0.627451,0.595238,0.8,0.6,0.626977
MISC_f1score,0.575342,0.727273,0.657895,0.811594,0.623377,0.679096
LOC_precision,0.463636,0.6,0.626087,0.55,0.622951,0.572535


### model_morph_with_lemmas_and_sentences_and_gazzetteer

In [11]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.898874,0.907742,0.901941,0.868736,0.910435,0.896492
Recall,0.865265,0.866947,0.877269,0.837265,0.868842,0.86198
F1-score,0.88175,0.886876,0.889434,0.85271,0.889152,0.878897


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.714286,0.754098,0.852941,0.768116,0.8,0.777888
ORG_recall,0.677966,0.69697,0.828571,0.726027,0.717949,0.729497
ORG_f1score,0.695652,0.724409,0.84058,0.746479,0.756757,0.752775
PER_precision,0.929121,0.941129,0.935437,0.896622,0.937882,0.928038
PER_recall,0.909563,0.908115,0.924365,0.892601,0.931479,0.913225
PER_f1score,0.919238,0.924327,0.929868,0.894607,0.93467,0.920542
MISC_precision,0.633333,0.837838,0.714286,0.8,0.611111,0.719314
MISC_recall,0.463415,0.607843,0.595238,0.8,0.55,0.603299
MISC_f1score,0.535211,0.704545,0.649351,0.8,0.578947,0.653611
LOC_precision,0.490566,0.614458,0.626087,0.547619,0.669492,0.589644


### model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features

In [12]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.908488,0.911664,0.903074,0.873153,0.912486,0.900805
Recall,0.87854,0.884215,0.881869,0.845254,0.884632,0.873782
F1-score,0.893263,0.89773,0.892345,0.858977,0.898343,0.887088


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.763636,0.761905,0.848485,0.736111,0.75,0.772027
ORG_recall,0.711864,0.727273,0.8,0.726027,0.730769,0.739187
ORG_f1score,0.736842,0.744186,0.823529,0.731034,0.74026,0.75517
PER_precision,0.936956,0.94489,0.937646,0.900628,0.94516,0.933056
PER_recall,0.923077,0.924607,0.928984,0.900224,0.945638,0.924506
PER_f1score,0.929965,0.934639,0.933295,0.900426,0.945399,0.928745
MISC_precision,0.666667,0.842105,0.714286,0.8,0.638889,0.732389
MISC_recall,0.487805,0.627451,0.595238,0.8,0.575,0.617099
MISC_f1score,0.56338,0.719101,0.649351,0.8,0.605263,0.667419
LOC_precision,0.514019,0.615385,0.652174,0.542636,0.643411,0.593525
