In [5]:
import os
import json
import pandas as pd
from modules.extract_results import display_results_by_subdistribution, display_results_by_named_entity, \
                                    display_confusion_matrix, extract_results_to_txt_file

In [38]:
def format_vertical_headers(df):
    """Display a dataframe with vertical column headers"""
    styles = [dict(selector="th", props=[('width', '40px')]),
              dict(selector="th.col_heading",
                   props=[("writing-mode", "vertical-rl"),
                          ('transform', 'rotateZ(180deg)'), 
                          ('height', '290px'),
                          ('vertical-align', 'top')])]
    return (df.fillna('').style.set_table_styles(styles))

In [55]:
totals_by_subdistribution = {}
totals_by_named_entity = {}
for directory in os.listdir("models"):
    if directory.startswith("model_"):
        try:
            with open(os.path.join('models', directory, 'results.txt')) as file:
                results_json = json.loads(file.read())
                totals_by_subdistribution[directory] = display_results_by_subdistribution(results_json)["Total"]
                totals_by_named_entity[directory] = display_results_by_named_entity(results_json)["Total"]
        except FileNotFoundError:
            print(f"(!) Mudelil {directory} puuduvad tulemused.")

# Total values by model:

In [56]:
format_vertical_headers(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
Precision,0.906989,0.865041,0.869267,0.854686,0.861326,0.852496,0.871473
Recall,0.87735,0.832976,0.826677,0.82971,0.829549,0.839655,0.814719
F1-score,0.891923,0.848706,0.847437,0.842013,0.845139,0.846027,0.842141


# Total values by named entity:

In [57]:
format_vertical_headers(pd.DataFrame(totals_by_named_entity))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
ORG_precision,0.785754,0.727925,0.720079,0.666395,0.707656,0.611642,0.710262
ORG_recall,0.736383,0.714776,0.682992,0.678772,0.710613,0.638855,0.693389
ORG_f1score,0.760226,0.721229,0.70102,0.67249,0.708901,0.624936,0.701549
PER_precision,0.938056,0.901294,0.904199,0.903024,0.907744,0.901649,0.903956
PER_recall,0.925798,0.885263,0.880306,0.885789,0.886013,0.892845,0.871246
PER_f1score,0.931886,0.8932,0.892084,0.894295,0.896743,0.897224,0.88728
MISC_precision,0.756482,0.676168,0.663362,0.701533,0.716353,0.756247,0.684265
MISC_recall,0.604258,0.58199,0.566603,0.592448,0.588379,0.660389,0.490312
MISC_f1score,0.67103,0.625068,0.610572,0.641902,0.645889,0.704549,0.56948
LOC_precision,0.594716,0.469372,0.53145,0.459129,0.451868,0.458467,0.530692


# All models:

### model_default

In [43]:
with open(os.path.join('models', 'model_default', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.911927,0.913476,0.911007,0.902855,0.904527,0.906989
Recall,0.879646,0.884364,0.883126,0.872729,0.874598,0.87735
F1-score,0.895495,0.898684,0.89685,0.887537,0.889311,0.891923


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.777778,0.777778,0.803279,0.792829,0.777108,0.785754
ORG_recall,0.711864,0.728,0.753846,0.742537,0.745665,0.736383
ORG_f1score,0.743363,0.752066,0.777778,0.766859,0.761062,0.760226
PER_precision,0.939442,0.942244,0.941482,0.93207,0.93504,0.938056
PER_recall,0.927235,0.927621,0.927866,0.92137,0.924899,0.925798
PER_f1score,0.933298,0.934875,0.934624,0.926689,0.929942,0.931886
MISC_precision,0.7,0.794118,0.775701,0.770833,0.741758,0.756482
MISC_recall,0.512195,0.586957,0.619403,0.656805,0.645933,0.604258
MISC_f1score,0.591549,0.675,0.688797,0.709265,0.690537,0.67103
LOC_precision,0.533981,0.605948,0.616967,0.601533,0.615152,0.594716


### model_local_features_without_morph

In [44]:
with open(os.path.join('models', 'model_local_features_without_morph', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.858887,0.868161,0.868814,0.862795,0.864699,0.865041
Recall,0.840265,0.835786,0.83589,0.828796,0.832153,0.832976
F1-score,0.849474,0.851666,0.852034,0.845454,0.848114,0.848706


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.807018,0.729508,0.719577,0.704545,0.678977,0.727925
ORG_recall,0.779661,0.712,0.697436,0.69403,0.690751,0.714776
ORG_f1score,0.793103,0.720648,0.708333,0.699248,0.684814,0.721229
PER_precision,0.89704,0.903751,0.904806,0.898862,0.902013,0.901294
PER_recall,0.889813,0.88289,0.884567,0.881798,0.887246,0.885263
PER_f1score,0.893412,0.893199,0.894572,0.890249,0.894569,0.8932
MISC_precision,0.567568,0.708333,0.696429,0.697987,0.710526,0.676168
MISC_recall,0.512195,0.554348,0.58209,0.615385,0.645933,0.58199
MISC_f1score,0.538462,0.621951,0.634146,0.654088,0.676692,0.625068
LOC_precision,0.349593,0.4811,0.516355,0.502591,0.497222,0.469372


### model_morph_without_lemmas

In [45]:
with open(os.path.join('models', 'model_morph_without_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.871472,0.879382,0.873745,0.863113,0.86736,0.869267
Recall,0.826549,0.824499,0.832473,0.823891,0.826488,0.826677
F1-score,0.848416,0.851057,0.852609,0.843046,0.846431,0.847437


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.781818,0.711864,0.702128,0.70428,0.700306,0.720079
ORG_recall,0.728814,0.672,0.676923,0.675373,0.66185,0.682992
ORG_f1score,0.754386,0.691358,0.689295,0.689524,0.680535,0.70102
PER_precision,0.902778,0.910421,0.907901,0.897538,0.902357,0.904199
PER_recall,0.878378,0.876109,0.884657,0.879233,0.883153,0.880306
PER_f1score,0.890411,0.892935,0.896128,0.888291,0.892652,0.892084
MISC_precision,0.555556,0.714286,0.690265,0.668874,0.687831,0.663362
MISC_recall,0.487805,0.543478,0.58209,0.597633,0.62201,0.566603
MISC_f1score,0.519481,0.617284,0.631579,0.63125,0.653266,0.610572
LOC_precision,0.513514,0.545802,0.546569,0.521352,0.530015,0.53145


### model_morph_with_lemmas

In [46]:
with open(os.path.join('models', 'model_morph_with_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.864109,0.866877,0.856607,0.849348,0.851479,0.854686
Recall,0.827212,0.820737,0.832928,0.829066,0.832368,0.82971
F1-score,0.845258,0.843176,0.844602,0.839085,0.841815,0.842013


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.65,0.68,0.668367,0.66787,0.665738,0.666395
ORG_recall,0.661017,0.68,0.671795,0.690299,0.690751,0.678772
ORG_f1score,0.655462,0.68,0.670077,0.678899,0.678014,0.67249
PER_precision,0.908389,0.90914,0.903441,0.895302,0.898846,0.903024
PER_recall,0.886435,0.875587,0.886723,0.887378,0.892822,0.885789
PER_f1score,0.897277,0.892048,0.895004,0.891323,0.895824,0.894295
MISC_precision,0.647059,0.732394,0.705357,0.697987,0.724868,0.701533
MISC_recall,0.536585,0.565217,0.589552,0.615385,0.655502,0.592448
MISC_f1score,0.586667,0.638037,0.642276,0.654088,0.688442,0.641902
LOC_precision,0.396694,0.443609,0.485057,0.486014,0.484268,0.459129


### model_morph_with_lemmas_and_sentences

In [47]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.865644,0.868436,0.865858,0.855932,0.859465,0.861326
Recall,0.836726,0.836339,0.835283,0.827556,0.82387,0.829549
F1-score,0.850939,0.852086,0.850296,0.841505,0.841291,0.845139


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.698413,0.698413,0.722513,0.711111,0.707831,0.707656
ORG_recall,0.745763,0.704,0.707692,0.716418,0.679191,0.710613
ORG_f1score,0.721311,0.701195,0.715026,0.713755,0.693215,0.708901
PER_precision,0.912788,0.911627,0.911594,0.900183,0.902527,0.907744
PER_recall,0.886694,0.886541,0.889238,0.883209,0.884381,0.886013
PER_f1score,0.899552,0.898909,0.900277,0.891615,0.893362,0.896743
MISC_precision,0.647059,0.739726,0.742857,0.72028,0.731844,0.716353
MISC_recall,0.536585,0.586957,0.58209,0.609467,0.626794,0.588379
MISC_f1score,0.586667,0.654545,0.65272,0.660256,0.675258,0.645889
LOC_precision,0.350877,0.465704,0.479167,0.475442,0.488152,0.451868


### model_morph_with_lemmas_and_sentences_and_gazzetteer

In [48]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.855575,0.858973,0.853547,0.846904,0.853236,0.852496
Recall,0.845354,0.845856,0.849787,0.833163,0.835586,0.839655
F1-score,0.850434,0.852364,0.851663,0.839977,0.844319,0.846027


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.596774,0.612403,0.606796,0.606383,0.635854,0.611642
ORG_recall,0.627119,0.632,0.641026,0.63806,0.656069,0.638855
ORG_f1score,0.61157,0.622047,0.623441,0.621818,0.645804,0.624936
PER_precision,0.900367,0.905347,0.903447,0.896281,0.902805,0.901649
PER_recall,0.892412,0.894366,0.899389,0.885646,0.892413,0.892845
PER_f1score,0.896372,0.899823,0.901414,0.890932,0.897579,0.897224
MISC_precision,0.794118,0.786667,0.74359,0.719745,0.737113,0.756247
MISC_recall,0.658537,0.641304,0.649254,0.668639,0.684211,0.660389
MISC_f1score,0.72,0.706587,0.693227,0.693252,0.709677,0.704549
LOC_precision,0.417266,0.452145,0.470716,0.471637,0.48057,0.458467


### model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features

In [58]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.873034,0.875964,0.875967,0.867759,0.86984,0.871473
Recall,0.835177,0.829147,0.808247,0.808258,0.813956,0.814719
F1-score,0.853686,0.851913,0.840746,0.836952,0.840971,0.842141


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.75,0.706349,0.705882,0.701961,0.687117,0.710262
ORG_recall,0.762712,0.712,0.676923,0.66791,0.647399,0.693389
ORG_f1score,0.756303,0.709163,0.691099,0.684512,0.666667,0.701549
PER_precision,0.905369,0.908145,0.905176,0.898606,0.90248,0.903956
PER_recall,0.885135,0.876761,0.859235,0.863969,0.871131,0.871246
PER_f1score,0.895138,0.892177,0.881607,0.880947,0.886529,0.88728
MISC_precision,0.571429,0.724638,0.7125,0.695652,0.717105,0.684265
MISC_recall,0.487805,0.543478,0.425373,0.473373,0.521531,0.490312
MISC_f1score,0.526316,0.621118,0.53271,0.56338,0.603878,0.56948
LOC_precision,0.436364,0.550725,0.566298,0.55332,0.546751,0.530692
