In [1]:
import os
import json
import pandas as pd
import numpy as np
from modules.extract_results import display_results_by_subdistribution, display_results_by_named_entity, \
                                    display_confusion_matrix, extract_results_to_txt_file
import matplotlib.pyplot as plt

### Folders for models:

The models in the `models` variable use these taggers:
1. `model_default` uses NerMorphFeatureTagger, NerLocalFeatureTagger, NerSentenceFeatureTagger, NerGazetteerFeatureTagger and NerGlobalContextFeatureTagger.
2. `model_local_features_without_morph` uses NerEmptyFeatureTagger and NerLocalFeatureWithoutMorphTagger.
3. `model_morph_without_lemmas` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger and NerMorphNoLemmasFeatureTagger.
4. `model_morph_with_lemmas` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger and NerBasicMorphFeatureTagger.
5. `model_morph_with_lemmas_and_sentences` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger, NerBasicMorphFeatureTagger and fex.NerSentenceFeatureTagger.
6. `model_morph_with_lemmas_and_sentences_and_gazetteer` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger, NerBasicMorphFeatureTagger, fex.NerSentenceFeatureTagger and fex.NerGazetteerFeatureTagger.
7. `model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features` uses NerEmptyFeatureTagger, NerLocalFeatureWithoutMorphTagger, NerBasicMorphFeatureTagger, fex.NerSentenceFeatureTagger, fex.NerGazetteerFeatureTagger and fex.NerGlobalContextFeatureTagger.

In [2]:
models = ['model_default',\
          'model_local_features_without_morph',\
          'model_morph_without_lemmas',\
          'model_morph_with_lemmas',\
          'model_morph_with_lemmas_and_sentences',\
          'model_morph_with_lemmas_and_sentences_and_gazetteer',\
          'model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features']

### Folders for results:

Every model in the `models` variable contains folders with different results, here are the folders inside the model folders:

1. `model_initial` contains results from the initial model
2. `model_vabamorf` contains results using VabamorfCorpusTagger
3. `model_vabamorf_gazetteer` contains results from using the "vallakohtud.csv" gazetteer
4. `model_vabamorf_gazetteer2` contains results using the gazeetter from the .tsv file
5. `model_vabamorf_gazetteer1and2` contains results from using both of the aforementioned gazetteers

In [3]:
model_results = ['model_initial',\
                 'model_vabamorf',\
                 'model_vabamorf_gazetteer',\
                 'model_vabamorf_gazetteer2',\
                 'model_vabamorf_gazetteer1and2',\
                 'model_gazetteer_both_new']

In [4]:
def get_results_from_directory(model_directory, directory_for_results):
    try:
        with open(os.path.join('models', model_directory, directory_for_results, 'results.txt')) as file:
            results_json = json.loads(file.read())
            return display_results_by_subdistribution(results_json)["Total"], display_results_by_named_entity(results_json)["Total"]
    except FileNotFoundError:
        print(f"(!) Mudelil {model_directory} puuduvad tulemused kaustas {directory_for_results}.")
        return {}, {}

In [9]:
totals_by_subdistribution = {}
totals_by_named_entity = {}

model = 'model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features'
results = 'model_gazetteer_both_new'

by_subdistribution, by_named_entity = get_results_from_directory(model, results)
totals_by_subdistribution[model + "," + str(results)] = by_subdistribution
totals_by_named_entity[model + "," + str(results)] = by_named_entity

# Total values by model:

In [8]:
display(pd.DataFrame(totals_by_subdistribution, index=["Precision", "Recall", "F1-score"]))

Unnamed: 0,"model_morph_with_lemmas_and_sentences_and_gazetteer_and_global_features,model_gazetteer_both_new"
Precision,0.902924
Recall,0.870606
F1-score,0.886471


# Total values by named entity:

In [None]:
format_vertical_headers(pd.DataFrame(totals_by_named_entity))

# Compare models:
Which models would you like to compare?

In [None]:
old = 'model_default,model_initial'
new = 'model_default,model_vabamorf'

#Enter models as 'model_name,results_folder'
#E.g. model_default,model_initial

In [None]:
old_by_subdistribution = totals_by_subdistribution[old]
new_by_subdistribution = totals_by_subdistribution[new]

In [None]:
print(new_by_subdistribution - old_by_subdistribution)

# All models:

Define the model which results you'd like to see:

In [None]:
print([model for model in models])

In [None]:
print([results for results in model_results])

In [None]:
model_name = 'model_default'
results_name = 'model_vabamorf_gazetteer'

In [None]:
with open(os.path.join('models', model_name, results_name, 'results.txt')) as file:
    results_json = json.loads(file.read())
    display(display_results_by_subdistribution(results_json))
    display(pd.DataFrame(display_results_by_named_entity(results_json)))