In [1]:
import os
import json
import pandas as pd
from modules.extract_results import display_results_by_subdistribution, display_results_by_named_entity, \
                                    display_confusion_matrix, extract_results_to_txt_file

In [2]:
def format_vertical_headers(df):
    """Display a dataframe with vertical column headers"""
    styles = [dict(selector="th", props=[('width', '40px')]),
              dict(selector="th.col_heading",
                   props=[("writing-mode", "vertical-rl"),
                          ('transform', 'rotateZ(180deg)'), 
                          ('height', '290px'),
                          ('vertical-align', 'top')])]
    return (df.fillna('').style.set_table_styles(styles))

In [3]:
totals_by_subdistribution = {}
totals_by_named_entity = {}
for directory in os.listdir("models"):
    if directory.startswith("model_"):
        try:
            with open(os.path.join('models', directory, 'results.txt')) as file:
                results_json = json.loads(file.read())
                totals_by_subdistribution[directory] = display_results_by_subdistribution(results_json)["Total"]
                totals_by_named_entity[directory] = display_results_by_named_entity(results_json)["Total"]
        except FileNotFoundError:
            print(f"(!) Mudelil {directory} puuduvad tulemused.")

# Total values by model:

In [4]:
format_vertical_headers(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
Precision,0.904527,0.864699,0.86989,0.899843,0.900211,0.898827,0.905881
Recall,0.874598,0.832153,0.845586,0.860993,0.86065,0.864727,0.873224
F1-score,0.889311,0.848114,0.857566,0.879989,0.879986,0.881447,0.889253


# Total values by named entity:

In [5]:
format_vertical_headers(pd.DataFrame(totals_by_named_entity))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
ORG_precision,0.779428,0.687798,0.708073,0.764448,0.778933,0.785898,0.777907
ORG_recall,0.744606,0.693405,0.694753,0.717828,0.722055,0.737153,0.738891
ORG_f1score,0.761262,0.689991,0.700954,0.740377,0.749319,0.760706,0.757856
PER_precision,0.935972,0.902675,0.910621,0.931687,0.932308,0.930386,0.936061
PER_recall,0.925524,0.887524,0.899791,0.91343,0.912425,0.915206,0.925076
PER_f1score,0.930714,0.895002,0.905156,0.922458,0.922235,0.922721,0.930529
MISC_precision,0.740069,0.711702,0.719368,0.730591,0.731492,0.749587,0.74329
MISC_recall,0.649946,0.652229,0.624728,0.620904,0.622585,0.640533,0.621258
MISC_f1score,0.688467,0.676465,0.665965,0.668685,0.669542,0.688006,0.674346
LOC_precision,0.609861,0.491394,0.498547,0.595758,0.588593,0.598201,0.614576


# All models:

### model_default

In [6]:
with open(os.path.join('models', 'model_default', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.911927,0.915015,0.905627,0.882717,0.911048,0.904527
Recall,0.879646,0.889086,0.880416,0.847297,0.881895,0.874598
F1-score,0.895495,0.901864,0.892844,0.864645,0.896234,0.889311


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.777778,0.777778,0.848485,0.764706,0.728395,0.779428
ORG_recall,0.711864,0.742424,0.8,0.712329,0.75641,0.744606
ORG_f1score,0.743363,0.75969,0.823529,0.737589,0.742138,0.761262
PER_precision,0.939442,0.945081,0.939801,0.908825,0.946711,0.935972
PER_recall,0.927235,0.92801,0.928406,0.905157,0.938812,0.925524
PER_f1score,0.933298,0.936468,0.934069,0.906987,0.942745,0.930714
MISC_precision,0.7,0.868421,0.74359,0.756757,0.631579,0.740069
MISC_recall,0.512195,0.647059,0.690476,0.8,0.6,0.649946
MISC_f1score,0.591549,0.741573,0.716049,0.777778,0.615385,0.688467
LOC_precision,0.533981,0.650602,0.641667,0.556391,0.666667,0.609861


### model_local_features_without_morph

In [7]:
with open(os.path.join('models', 'model_local_features_without_morph', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.858887,0.877747,0.870244,0.847991,0.872068,0.864699
Recall,0.840265,0.831304,0.836117,0.811443,0.845263,0.832153
F1-score,0.849474,0.853894,0.85284,0.829315,0.858456,0.848114


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.807018,0.661538,0.701493,0.666667,0.602273,0.687798
ORG_recall,0.779661,0.651515,0.671429,0.684932,0.679487,0.693405
ORG_f1score,0.793103,0.656489,0.686131,0.675676,0.638554,0.689991
PER_precision,0.89704,0.910724,0.907134,0.884206,0.914271,0.902675
PER_recall,0.889813,0.875916,0.888279,0.874888,0.908723,0.887524
PER_f1score,0.893412,0.892981,0.897608,0.879522,0.911489,0.895002
MISC_precision,0.567568,0.857143,0.675,0.702703,0.756098,0.711702
MISC_recall,0.512195,0.588235,0.642857,0.742857,0.775,0.652229
MISC_f1score,0.538462,0.697674,0.658537,0.722222,0.765432,0.676465
LOC_precision,0.349593,0.577381,0.591241,0.463576,0.475177,0.491394


### model_morph_without_lemmas

In [8]:
with open(os.path.join('models', 'model_morph_without_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.882516,0.853945,0.873412,0.855577,0.886398,0.86989
Recall,0.847566,0.853,0.865166,0.812187,0.857474,0.845586
F1-score,0.864688,0.853472,0.869269,0.833317,0.871696,0.857566


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.754717,0.617647,0.732394,0.708333,0.727273,0.708073
ORG_recall,0.677966,0.636364,0.742857,0.69863,0.717949,0.694753
ORG_f1score,0.714286,0.626866,0.737589,0.703448,0.722581,0.700954
PER_precision,0.919071,0.898814,0.915558,0.891873,0.927788,0.910621
PER_recall,0.894231,0.892932,0.913972,0.878475,0.919343,0.899791
PER_f1score,0.906481,0.895863,0.914765,0.885124,0.923546,0.905156
MISC_precision,0.655172,0.775,0.7,0.666667,0.8,0.719368
MISC_recall,0.463415,0.607843,0.666667,0.685714,0.7,0.624728
MISC_f1score,0.542857,0.681319,0.682927,0.676056,0.746667,0.665965
LOC_precision,0.389381,0.494792,0.562914,0.527132,0.518519,0.498547


### model_morph_with_lemmas

In [9]:
with open(os.path.join('models', 'model_morph_with_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.903478,0.907618,0.90438,0.875318,0.912669,0.899843
Recall,0.867699,0.867833,0.874607,0.832064,0.869053,0.860993
F1-score,0.885227,0.887279,0.889244,0.853143,0.890327,0.879989


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.709091,0.754098,0.848485,0.757143,0.753425,0.764448
ORG_recall,0.661017,0.69697,0.8,0.726027,0.705128,0.717828
ORG_f1score,0.684211,0.724409,0.823529,0.741259,0.728477,0.740377
PER_precision,0.935279,0.936469,0.939794,0.904827,0.942066,0.931687
PER_recall,0.91632,0.906806,0.923788,0.891031,0.929204,0.91343
PER_f1score,0.925702,0.921399,0.931722,0.897876,0.935591,0.922458
MISC_precision,0.655172,0.868421,0.684211,0.823529,0.621622,0.730591
MISC_recall,0.463415,0.647059,0.619048,0.8,0.575,0.620904
MISC_f1score,0.542857,0.741573,0.65,0.811594,0.597403,0.668685
LOC_precision,0.490566,0.628049,0.608333,0.565891,0.68595,0.595758


### model_morph_with_lemmas_and_sentences

In [10]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.903129,0.912363,0.901917,0.879707,0.907169,0.900211
Recall,0.868363,0.86429,0.877027,0.824633,0.876421,0.86065
F1-score,0.885405,0.887676,0.889298,0.85128,0.89153,0.879986


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.792453,0.733333,0.850746,0.764706,0.753425,0.778933
ORG_recall,0.711864,0.666667,0.814286,0.712329,0.705128,0.722055
ORG_f1score,0.75,0.698413,0.832117,0.737589,0.728477,0.749319
PER_precision,0.934005,0.942538,0.937317,0.906897,0.940786,0.932308
PER_recall,0.9158,0.906021,0.923788,0.884529,0.931985,0.912425
PER_f1score,0.924813,0.923919,0.930503,0.895573,0.936365,0.922235
MISC_precision,0.678571,0.815789,0.717949,0.823529,0.621622,0.731492
MISC_recall,0.463415,0.607843,0.666667,0.8,0.575,0.622585
MISC_f1score,0.550725,0.696629,0.691358,0.811594,0.597403,0.669542
LOC_precision,0.47619,0.612903,0.616,0.576,0.661871,0.588593


### model_morph_with_lemmas_and_sentences_and_gazzetteer

In [11]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.904062,0.909363,0.902269,0.879149,0.902838,0.898827
Recall,0.87146,0.875138,0.876059,0.836522,0.870526,0.864727
F1-score,0.887462,0.891922,0.888971,0.857306,0.886388,0.881447


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.781818,0.803279,0.830769,0.753623,0.76,0.785898
ORG_recall,0.728814,0.742424,0.771429,0.712329,0.730769,0.737153
ORG_f1score,0.754386,0.771654,0.8,0.732394,0.745098,0.760706
PER_precision,0.932786,0.938308,0.940832,0.90487,0.935131,0.930386
PER_recall,0.91606,0.91178,0.927252,0.89148,0.929456,0.915206
PER_f1score,0.924348,0.924854,0.933992,0.898125,0.932285,0.922721
MISC_precision,0.8,0.868421,0.657895,0.8,0.621622,0.749587
MISC_recall,0.585366,0.647059,0.595238,0.8,0.575,0.640533
MISC_f1score,0.676056,0.741573,0.625,0.8,0.597403,0.688006
LOC_precision,0.481132,0.654545,0.596774,0.596899,0.661654,0.598201


### model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features

In [12]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.913734,0.915219,0.905642,0.883514,0.914987,0.905881
Recall,0.878761,0.88665,0.878238,0.848226,0.879158,0.873224
F1-score,0.895906,0.900708,0.891729,0.86551,0.896715,0.889253


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.763636,0.777778,0.818182,0.753623,0.776316,0.777907
ORG_recall,0.711864,0.742424,0.771429,0.712329,0.75641,0.738891
ORG_f1score,0.736842,0.75969,0.794118,0.732394,0.766234,0.757856
PER_precision,0.941394,0.944489,0.939235,0.909725,0.945464,0.936061
PER_recall,0.926715,0.92644,0.928118,0.906054,0.938053,0.925076
PER_f1score,0.933997,0.935377,0.933643,0.907886,0.941744,0.930529
MISC_precision,0.666667,0.891892,0.685714,0.823529,0.648649,0.74329
MISC_recall,0.487805,0.647059,0.571429,0.8,0.6,0.621258
MISC_f1score,0.56338,0.75,0.623377,0.811594,0.623377,0.674346
LOC_precision,0.544554,0.658537,0.623932,0.547445,0.698413,0.614576
