In [1]:
import os
import json
import pandas as pd
import numpy as np
from modules.extract_results import display_results_by_subdistribution, display_results_by_named_entity, \
                                    display_confusion_matrix, extract_results_to_txt_file

### Folders for models:

The models in the `models` variable use these taggers:
1. `model_default` uses NerMorphFeatureTagger, NerLocalFeatureTagger, NerSentenceFeatureTagger, NerGazetteerFeatureTagger and NerGlobalContextFeatureTagger.
2. `model_local_features_without_morph` uses NerEmptyFeatureTagger and NerLocalFeatureWithoutMorphTagger.
3. `model_morph_without_lemmas` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger and NerMorphNoLemmasFeatureTagger.
4. `model_morph_with_lemmas` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger and NerBasicMorphFeatureTagger.
5. `model_morph_with_lemmas_and_sentences` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger, NerBasicMorphFeatureTagger and fex.NerSentenceFeatureTagger.
6. `model_morph_with_lemmas_and_sentences_and_gazetteer` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger, NerBasicMorphFeatureTagger, fex.NerSentenceFeatureTagger and fex.NerGazetteerFeatureTagger.
7. `model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger, NerBasicMorphFeatureTagger, fex.NerSentenceFeatureTagger, fex.NerGazetteerFeatureTagger and fex.NerGlobalContextFeatureTagger.

In [9]:
def get_results_from_directory(model_directory):
    with open(os.path.join('models', model_directory, 'results.txt')) as file:
        results_json = json.loads(file.read())
        return display_results_by_subdistribution(results_json)["Total"], display_results_by_named_entity(results_json)["Total"]

In [32]:
totals_by_subdistribution = {}
totals_by_named_entity = {}

model = os.path.join('model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features', 'model_gazetteer_both_lowercase_added_loc_and_variants')

by_subdistribution, by_named_entity = get_results_from_directory(model)
totals_by_subdistribution[model] = by_subdistribution
totals_by_named_entity[model] = by_named_entity

# Total values by model:

In [33]:
display(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]))

Unnamed: 0,model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features\model_gazetteer_both_lowercase_added_loc_and_variants
Precision,0.905278
Recall,0.874469
F1-score,0.889607


# Total values by named entity:

In [34]:
display(pd.DataFrame(totals_by_named_entity))

Unnamed: 0,model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features\model_gazetteer_both_lowercase_added_loc_and_variants
LOC_ORG_f1score,0.703074
LOC_ORG_precision,0.74778
LOC_ORG_recall,0.66657
LOC_f1score,0.505602
LOC_precision,0.588445
LOC_recall,0.44458
MISC_f1score,0.680123
MISC_precision,0.740479
MISC_recall,0.633984
ORG_f1score,0.760931


# Compare models:
Enter two folders with a `settings.py` file. Positive values are better.

In [6]:
old = os.path.join('model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features',\
                   'model_gazetteer_both_lowercase_added_loc')
new = os.path.join('model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features',\
                   'model_gazetteer_both_lowercase_added_loc_and_variants')

In [39]:
for model in [old, new]:
    try:
        by_subdistribution, by_named_entity = get_results_from_directory(model)
        totals_by_subdistribution[model] = by_subdistribution
        totals_by_named_entity[model] = by_named_entity
    except:
        continue
display(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]))
display(pd.DataFrame(totals_by_named_entity))

Unnamed: 0,model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features\model_gazetteer_both_lowercase_added_loc_and_variants,model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features\model_gazetteer_both_lowercase_added_loc
Precision,0.905278,0.90581
Recall,0.874469,0.874555
F1-score,0.889607,0.889908


Unnamed: 0,model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features\model_gazetteer_both_lowercase_added_loc_and_variants,model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features\model_gazetteer_both_lowercase_added_loc
ORG_precision,0.780813,0.782481
ORG_recall,0.74239,0.744954
ORG_f1score,0.760931,0.763151
PER_precision,0.936931,0.936784
PER_recall,0.925397,0.925153
PER_f1score,0.931123,0.930925
MISC_precision,0.740479,0.736318
MISC_recall,0.633984,0.635776
MISC_f1score,0.680123,0.679614
LOC_precision,0.588445,0.598926
