In [1]:
import os
import pandas as pd
from performance import calculate_extraction_performance, single_verb, calculate_ner_performance

pd.options.mode.chained_assignment = None
pd.options.display.float_format = '{:,.3f}'.format

## Verb phrase/noun phrase extraction performance

In [2]:
original = pd.read_csv(os.path.join('..', 'datasets', 'activity_dataset_large.csv'), sep=';')
result_files = ['corenlp-phrases.csv', 'flair-phrases.csv', 'spacy-phrases.csv', 'stanza-phrases.csv', 
                'bert-phrases.csv', 'allen-phrases.csv', 'custom-phrases.csv', 'custom2-phrases.csv']
result_files = list(map(lambda _: os.path.join('larger', _), result_files))
results = pd.DataFrame([calculate_extraction_performance(file, original) for file in result_files]).T
results.columns = results.loc['Extractor'].str.replace('larger/', '')
results.drop(labels=['Extractor'], axis=0, inplace=True)
results

Extractor,corenlp,flair,spacy,stanza,bert,allen,custom,custom2
AccuracyVerbRequired,0.728,0.817,0.816,0.82,0.857,0.686,0.818,0.86
F1ScoreVerbRequired,0.827,0.89,0.89,0.892,0.916,0.797,0.891,0.918
AccuracyNounRequired,0.844,0.988,0.983,0.99,0.988,0.98,0.921,0.961
F1ScoreNounRequired,0.914,0.994,0.991,0.995,0.994,0.99,0.958,0.98
PrecisionOutputNoun,0.67,0.883,0.848,0.918,0.875,0.818,0.753,0.817
RecallOutputNoun,0.591,0.878,0.832,0.915,0.872,0.79,0.698,0.777
F1ScoreOutputNoun,0.628,0.88,0.84,0.917,0.873,0.804,0.724,0.797
PrecisionOutputBoth_Nouns,0.645,0.736,0.707,0.745,0.768,0.569,0.694,0.75
RecallOutputBoth_Nouns,0.551,0.733,0.703,0.745,0.766,0.568,0.646,0.727
F1ScoreOutputBoth_Nouns,0.594,0.735,0.705,0.745,0.767,0.568,0.669,0.738


In [3]:
original = pd.read_csv(os.path.join('..', 'datasets', 'activity_dataset_large_concat.csv'), sep=';')
result_files = ['corenlp-phrases-concat.csv', 'flair-phrases-concat.csv', 'spacy-phrases-concat.csv', 'stanza-phrases-concat.csv', 
                'bert-phrases-concat.csv', 'allen-phrases-concat.csv', 'custom-phrases-concat.csv', 'custom2-phrases-concat.csv']
result_files = list(map(lambda _: os.path.join('larger', _), result_files))
results = pd.DataFrame([calculate_extraction_performance(file, original) for file in result_files]).T
results.columns = results.loc['Extractor'].str.replace('larger/', '')
results.drop(labels=['Extractor'], axis=0, inplace=True)
results

Extractor,corenlp,flair,spacy,stanza,bert,allen,custom,custom2
AccuracyVerbRequired,0.52,0.666,0.596,0.509,0.744,0.454,0.754,0.814
F1ScoreVerbRequired,0.652,0.781,0.723,0.638,0.84,0.586,0.849,0.889
AccuracyNounRequired,0.907,1.0,1.0,1.0,1.0,1.0,0.959,0.982
F1ScoreNounRequired,0.951,1.0,1.0,1.0,1.0,1.0,0.979,0.991
PrecisionOutputNoun,0.271,0.765,0.747,0.811,0.753,0.732,0.52,0.566
RecallOutputNoun,0.232,0.765,0.747,0.811,0.753,0.732,0.466,0.546
F1ScoreOutputNoun,0.25,0.765,0.747,0.811,0.753,0.732,0.492,0.556
PrecisionOutputBoth_Nouns,0.208,0.484,0.38,0.331,0.558,0.245,0.424,0.485
RecallOutputBoth_Nouns,0.19,0.484,0.38,0.331,0.558,0.245,0.409,0.477
F1ScoreOutputBoth_Nouns,0.199,0.484,0.38,0.331,0.558,0.245,0.416,0.481


## Named entity detection performance

In [4]:
ner_original = pd.read_csv(os.path.join('..', 'datasets', 'ner_dataset_final.csv'), sep=';')
result_files = ['corenlp-ner.csv', 'flair-ner.csv', 'spacy-ner.csv', 'stanza-ner.csv', 'bert-ner.csv', 'allen-ner.csv']
result_files = list(map(lambda _: os.path.join('smaller', _), result_files))
results = pd.DataFrame([calculate_ner_performance(file, ner_original) for file in result_files]).T
results.columns = results.loc['Extractor'].str.replace('smaller/', '')
results.drop(labels=['Extractor'], axis=0, inplace=True)
results

Extractor,corenlp,flair,spacy,stanza,bert,allen
AccuracyNEFound,0.819,0.816,0.816,0.921,0.733,0.807
F1ScoreNEFound,0.437,0.605,0.572,0.811,0.527,0.624
PrecisionNEEntry,0.734,0.744,0.766,0.94,0.382,0.756
RecallNEEntry,0.233,0.478,0.429,0.727,0.258,0.551
F1ScoreNEEntry,0.354,0.582,0.55,0.82,0.308,0.638
PrecisionNEType,0.872,0.889,0.893,0.943,0.754,0.89
RecallNEType,0.278,0.571,0.504,0.731,0.513,0.653
F1ScoreNEType,0.421,0.696,0.645,0.823,0.611,0.754
