In [1]:
import os
import json
import pandas as pd
from modules.extract_results import display_results_by_subdistribution, display_results_by_named_entity, \
                                    display_confusion_matrix, extract_results_to_txt_file

In [2]:
def format_vertical_headers(df):
    """Display a dataframe with vertical column headers"""
    styles = [dict(selector="th", props=[('width', '40px')]),
              dict(selector="th.col_heading",
                   props=[("writing-mode", "vertical-rl"),
                          ('transform', 'rotateZ(180deg)'), 
                          ('height', '290px'),
                          ('vertical-align', 'top')])]
    return (df.fillna('').style.set_table_styles(styles))

In [3]:
totals_by_subdistribution = {}
totals_by_named_entity = {}
for directory in os.listdir("models"):
    if directory.startswith("model_"):
        try:
            with open(os.path.join('models', directory, 'results.txt')) as file:
                results_json = json.loads(file.read())
                totals_by_subdistribution[directory] = display_results_by_subdistribution(results_json)["Total"]
                totals_by_named_entity[directory] = display_results_by_named_entity(results_json)["Total"]
        except FileNotFoundError:
            print(f"(!) Mudelil {directory} puuduvad tulemused.")

# Total values by model:

In [4]:
format_vertical_headers(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
Precision,0.904527,0.864699,0.86736,0.851479,0.859465,0.853236,0.863889
Recall,0.874598,0.832153,0.826488,0.832368,0.82387,0.835586,0.840829
F1-score,0.889311,0.848114,0.846431,0.841815,0.841291,0.844319,0.852203


# Total values by named entity:

In [5]:
format_vertical_headers(pd.DataFrame(totals_by_named_entity))

Unnamed: 0,model_default,model_local_features_without_morph,model_morph_without_lemmas,model_morph_with_lemmas,model_morph_with_lemmas_and_sentences,model_morph_with_lemmas_and_sentences_and_gazzetteer,model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features
ORG_precision,0.779428,0.687798,0.702837,0.666157,0.70863,0.634594,0.682631
ORG_recall,0.744606,0.693405,0.664471,0.689433,0.683545,0.653742,0.684526
ORG_f1score,0.761262,0.689991,0.68291,0.677307,0.693807,0.643695,0.682496
PER_precision,0.935972,0.902675,0.90344,0.899639,0.903773,0.903301,0.908416
PER_recall,0.925524,0.887524,0.884017,0.893153,0.88509,0.893938,0.897962
PER_f1score,0.930714,0.895002,0.893553,0.896222,0.894317,0.898553,0.903106
MISC_precision,0.740069,0.711702,0.691498,0.723416,0.730649,0.740417,0.707716
MISC_recall,0.649946,0.652229,0.62497,0.661393,0.62995,0.689102,0.640314
MISC_f1score,0.688467,0.676465,0.651583,0.687706,0.67364,0.710975,0.66873
LOC_precision,0.609861,0.491394,0.531158,0.479357,0.483046,0.479158,0.526693


# All models:

### model_default

In [6]:
with open(os.path.join('models', 'model_default', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.911927,0.915015,0.905627,0.882717,0.911048,0.904527
Recall,0.879646,0.889086,0.880416,0.847297,0.881895,0.874598
F1-score,0.895495,0.901864,0.892844,0.864645,0.896234,0.889311


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.777778,0.777778,0.848485,0.764706,0.728395,0.779428
ORG_recall,0.711864,0.742424,0.8,0.712329,0.75641,0.744606
ORG_f1score,0.743363,0.75969,0.823529,0.737589,0.742138,0.761262
PER_precision,0.939442,0.945081,0.939801,0.908825,0.946711,0.935972
PER_recall,0.927235,0.92801,0.928406,0.905157,0.938812,0.925524
PER_f1score,0.933298,0.936468,0.934069,0.906987,0.942745,0.930714
MISC_precision,0.7,0.868421,0.74359,0.756757,0.631579,0.740069
MISC_recall,0.512195,0.647059,0.690476,0.8,0.6,0.649946
MISC_f1score,0.591549,0.741573,0.716049,0.777778,0.615385,0.688467
LOC_precision,0.533981,0.650602,0.641667,0.556391,0.666667,0.609861


### model_local_features_without_morph

In [7]:
with open(os.path.join('models', 'model_local_features_without_morph', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.858887,0.877747,0.870244,0.847991,0.872068,0.864699
Recall,0.840265,0.831304,0.836117,0.811443,0.845263,0.832153
F1-score,0.849474,0.853894,0.85284,0.829315,0.858456,0.848114


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.807018,0.661538,0.701493,0.666667,0.602273,0.687798
ORG_recall,0.779661,0.651515,0.671429,0.684932,0.679487,0.693405
ORG_f1score,0.793103,0.656489,0.686131,0.675676,0.638554,0.689991
PER_precision,0.89704,0.910724,0.907134,0.884206,0.914271,0.902675
PER_recall,0.889813,0.875916,0.888279,0.874888,0.908723,0.887524
PER_f1score,0.893412,0.892981,0.897608,0.879522,0.911489,0.895002
MISC_precision,0.567568,0.857143,0.675,0.702703,0.756098,0.711702
MISC_recall,0.512195,0.588235,0.642857,0.742857,0.775,0.652229
MISC_f1score,0.538462,0.697674,0.658537,0.722222,0.765432,0.676465
LOC_precision,0.349593,0.577381,0.591241,0.463576,0.475177,0.491394


### model_morph_without_lemmas

In [8]:
with open(os.path.join('models', 'model_morph_without_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.871472,0.887482,0.862018,0.837272,0.884093,0.86736
Recall,0.826549,0.822449,0.849915,0.802898,0.836632,0.826488
F1-score,0.848416,0.853729,0.855924,0.819725,0.859708,0.846431


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.781818,0.650794,0.685714,0.710145,0.685714,0.702837
ORG_recall,0.728814,0.621212,0.685714,0.671233,0.615385,0.664471
ORG_f1score,0.754386,0.635659,0.685714,0.690141,0.648649,0.68291
PER_precision,0.902778,0.918294,0.902537,0.872148,0.921442,0.90344
PER_recall,0.878378,0.873822,0.90358,0.865695,0.898609,0.884017
PER_f1score,0.890411,0.895506,0.903058,0.86891,0.909882,0.893553
MISC_precision,0.555556,0.882353,0.651163,0.605263,0.763158,0.691498
MISC_recall,0.487805,0.588235,0.666667,0.657143,0.725,0.62497
MISC_f1score,0.519481,0.705882,0.658824,0.630137,0.74359,0.651583
LOC_precision,0.513514,0.569536,0.547945,0.454545,0.570248,0.531158


### model_morph_with_lemmas

In [9]:
with open(os.path.join('models', 'model_morph_with_lemmas', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.864109,0.869709,0.835923,0.831825,0.859743,0.851479
Recall,0.827212,0.814257,0.859598,0.819617,0.845263,0.832368
F1-score,0.845258,0.84107,0.847595,0.825676,0.852442,0.841815


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.65,0.707692,0.647887,0.666667,0.658537,0.666157
ORG_recall,0.661017,0.69697,0.657143,0.739726,0.692308,0.689433
ORG_f1score,0.655462,0.70229,0.652482,0.701299,0.675,0.677307
PER_precision,0.908389,0.909917,0.891556,0.875663,0.91267,0.899639
PER_recall,0.886435,0.86466,0.911374,0.889013,0.914286,0.893153
PER_f1score,0.897277,0.886711,0.901356,0.882287,0.913477,0.896222
MISC_precision,0.647059,0.810811,0.658537,0.675676,0.825,0.723416
MISC_recall,0.536585,0.588235,0.642857,0.714286,0.825,0.661393
MISC_f1score,0.586667,0.681818,0.650602,0.694444,0.825,0.687706
LOC_precision,0.396694,0.482759,0.550296,0.489051,0.477987,0.479357


### model_morph_with_lemmas_and_sentences

In [10]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.865644,0.871251,0.86025,0.831836,0.873864,0.859465
Recall,0.836726,0.835953,0.83297,0.808657,0.809474,0.82387
F1-score,0.850939,0.853237,0.84639,0.820083,0.840437,0.841291


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.698413,0.698413,0.769231,0.683544,0.693548,0.70863
ORG_recall,0.745763,0.666667,0.714286,0.739726,0.551282,0.683545
ORG_f1score,0.721311,0.682171,0.740741,0.710526,0.614286,0.693807
PER_precision,0.912788,0.91046,0.911523,0.872269,0.911826,0.903773
PER_recall,0.886694,0.886387,0.895208,0.868161,0.889001,0.88509
PER_f1score,0.899552,0.898262,0.903292,0.87021,0.900269,0.894317
MISC_precision,0.647059,0.820513,0.75,0.657895,0.777778,0.730649
MISC_recall,0.536585,0.627451,0.571429,0.714286,0.7,0.62995
MISC_f1score,0.586667,0.711111,0.648649,0.684932,0.736842,0.67364
LOC_precision,0.350877,0.546012,0.514019,0.464,0.540323,0.483046


### model_morph_with_lemmas_and_sentences_and_gazzetteer

In [11]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.855575,0.862396,0.84208,0.829961,0.878529,0.853236
Recall,0.845354,0.846358,0.858388,0.792495,0.845053,0.835586
F1-score,0.850434,0.854302,0.850156,0.810795,0.861466,0.844319


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.596774,0.626866,0.597403,0.605263,0.746667,0.634594
ORG_recall,0.627119,0.636364,0.657143,0.630137,0.717949,0.653742
ORG_f1score,0.61157,0.631579,0.62585,0.61745,0.732026,0.643695
PER_precision,0.900367,0.910396,0.899344,0.877919,0.92848,0.903301
PER_recall,0.892412,0.896335,0.910508,0.851345,0.91909,0.893938
PER_f1score,0.896372,0.903311,0.904892,0.864428,0.923761,0.898553
MISC_precision,0.794118,0.780488,0.666667,0.65,0.810811,0.740417
MISC_recall,0.658537,0.627451,0.666667,0.742857,0.75,0.689102
MISC_f1score,0.72,0.695652,0.666667,0.693333,0.779221,0.710975
LOC_precision,0.417266,0.481707,0.506329,0.474359,0.516129,0.479158


### model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features

In [12]:
with open(os.path.join('models', 'model_morph_with_lemmas_and_sentences_and_gazzetteer_and_global_features', 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))

Alamhulk,1,2,3,4,5,Total
Precision,0.87157,0.877816,0.851533,0.830795,0.894713,0.863889
Recall,0.836283,0.854107,0.860809,0.830795,0.826526,0.840829
F1-score,0.853562,0.865799,0.856145,0.830795,0.859269,0.852203


Unnamed: 0,1,2,3,4,5,Total
ORG_precision,0.606557,0.734375,0.65,0.722222,0.7,0.682631
ORG_recall,0.627119,0.712121,0.742857,0.712329,0.628205,0.684526
ORG_f1score,0.616667,0.723077,0.693333,0.717241,0.662162,0.682496
PER_precision,0.912652,0.915512,0.909639,0.875613,0.928665,0.908416
PER_recall,0.89605,0.899215,0.915416,0.88052,0.898609,0.897962
PER_f1score,0.904275,0.90729,0.912518,0.87806,0.91339,0.903106
MISC_precision,0.666667,0.794872,0.710526,0.609756,0.756757,0.707716
MISC_recall,0.536585,0.607843,0.642857,0.714286,0.7,0.640314
MISC_f1score,0.594595,0.688889,0.675,0.657895,0.727273,0.66873
LOC_precision,0.434783,0.584795,0.555556,0.475,0.583333,0.526693
