# 0. Prep

In [1]:
%matplotlib inline

import scipy
import pandas as pd
import numpy as np

In [2]:
# Import Predictions on Wiki data

wiki_wiki_df = pd.read_csv('workspace/loss/wiki_model_wiki_data.csv')
full_wiki_df = pd.read_csv('workspace/loss/full_model_wiki_data.csv')

# Import Predictions on Book data

wiki_book_df = pd.read_csv('workspace/loss/wiki_model_book_data.csv')
full_book_df = pd.read_csv('workspace/loss/full_model_book_data.csv')

# Import Tag Analysis on Wiki Data

tag_loss_df = pd.read_csv('results/expected_loss.csv', index_col='Tag')

# Set minimum count for each tag in wiki test data, to ensure reasonable estimators

tag_loss_df = tag_loss_df[tag_loss_df['Doc Count'] >= 100]

# 1. Expected Loss

In [3]:
# Wiki Model

wiki_loss = wiki_wiki_df['Loss'].mean()
wiki_loss

2.089758413518253

In [4]:
# Full Model

full_loss = full_wiki_df['Loss'].mean()
full_loss

2.2918429620704126

In [5]:
# Full model performs slightly worse on Wiki data ...
# (Greater Loss is Lower Accuracy)

full_loss - wiki_loss

0.2020845485521594

In [6]:
# Statistical Significance

scipy.stats.ttest_ind(wiki_wiki_df['Loss'], full_wiki_df['Loss'])

Ttest_indResult(statistic=-38.6407228863456, pvalue=0.0)

In [7]:
# ... But naturally the Full model does better on Book data

full_book_df['Loss'].mean() - wiki_book_df['Loss'].mean()

-2.0124328713883513

In [8]:
# Statistical Significance

scipy.stats.ttest_ind(wiki_book_df['Loss'], full_book_df['Loss'])

Ttest_indResult(statistic=226.24333128318304, pvalue=0.0)

# 2. Loss Per Tag

### Headline Accuracy

In [9]:
# Tags, sorted by Wiki Model accuracy (relative to average)

tag_loss_df.sort_values('Expected-Loss-Per-Word (Wiki)').head(10)

Unnamed: 0_level_0,Tag Type,Doc Count,Expected-Loss-Per-Word (Wiki),Expected-Loss-Per-Word (Full)
Tag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
verb.competition,sst,4410,-0.226064,-0.234867
PunctType=Peri,mor,42666,-0.220284,-0.23689
SYM,pos,2235,-0.186239,-0.168095
Mood=Ind,mor,34635,-0.182003,-0.201199
PronType=Art,mor,68187,-0.177865,-0.18949
Definite=Def,mor,56805,-0.175617,-0.185289
ADP,pos,80856,-0.168727,-0.173951
DET,pos,70423,-0.165445,-0.176635
Definite=Ind,mor,26725,-0.161113,-0.173452
Case=Nom,mor,15184,-0.156026,-0.192079


### Model Comparison

In [10]:
# Compare Model Accuracy, by Tag

tag_loss_df['Model-Diff']  = tag_loss_df['Expected-Loss-Per-Word (Full)']
tag_loss_df['Model-Diff'] -= tag_loss_df['Expected-Loss-Per-Word (Wiki)']

In [11]:
# Top 5 POS tags, sorted by improvement in Full Model over Wiki Model

tag_loss_df[tag_loss_df['Tag Type']=='pos'].sort_values('Model-Diff').head(5)

Unnamed: 0_level_0,Tag Type,Doc Count,Expected-Loss-Per-Word (Wiki),Expected-Loss-Per-Word (Full),Model-Diff
Tag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
INTJ,pos,245,0.242559,0.180147,-0.062412
SCONJ,pos,12231,0.028618,-0.00783,-0.036449
PRON,pos,37458,-0.090316,-0.121282,-0.030966
AUX,pos,41050,-0.133965,-0.156721,-0.022756
PART,pos,23875,-0.087899,-0.110277,-0.022378


In [12]:
# Top 5 Morphology tags, sorted by improvement in Full Model over Wiki Model

tag_loss_df[tag_loss_df['Tag Type']=='mor'].sort_values('Model-Diff').head(5)

Unnamed: 0_level_0,Tag Type,Doc Count,Expected-Loss-Per-Word (Wiki),Expected-Loss-Per-Word (Full),Model-Diff
Tag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
VerbType=Mod,mor,128,0.040358,-0.094038,-0.134396
Person=2,mor,389,0.230405,0.122099,-0.108306
Case=Acc,mor,3683,0.115663,0.026425,-0.089238
Gender=Fem,mor,3734,0.004815,-0.065956,-0.070771
Person=1,mor,1429,0.153492,0.093018,-0.060474


In [13]:
# Top 5 Supersense tags, sorted by improvement in Full Model over Wiki Model

tag_loss_df[tag_loss_df['Tag Type']=='sst'].sort_values('Model-Diff').head(5)

Unnamed: 0_level_0,Tag Type,Doc Count,Expected-Loss-Per-Word (Wiki),Expected-Loss-Per-Word (Full),Model-Diff
Tag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
noun.feeling,sst,771,0.363257,0.264155,-0.099101
verb.emotion,sst,911,0.340114,0.242821,-0.097293
noun.Tops,sst,683,0.263324,0.21494,-0.048384
verb.motion,sst,5498,0.104309,0.057368,-0.046941
verb.body,sst,880,0.088734,0.04371,-0.045024
