In [1]:
import json
import pandas as pd
from pathlib import Path
from pprint import pprint
import datetime
import time
from scipy import stats
import numpy as np
import matplotlib as plt

import pandas
import researchpy as rp
import seaborn as sns

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp

%matplotlib inline

pd.set_option('display.max_rows', 10000)

In [2]:
TRAINING_ROUND = 'Training_Round'
EPOCHS = 'Epochs'
SCENARIO = 'Scenario'
EMBEDDING = 'Embedding'
TAGGING_SCHEME = 'Tagging_Scheme'
CAP_DIM = 'Capitalisation_Dim'
LOWER = 'Lower'
CHAR_DIM = 'Char_LSTM_Dim'
WORD_DIM = 'Word_LSTM_Dim'
PRECISION = 'Precision'
RECALL = 'Recall'
F1_SCORE = 'F1_Score'

In [3]:
columns = [TRAINING_ROUND, EPOCHS, SCENARIO, EMBEDDING, TAGGING_SCHEME, CAP_DIM, LOWER, CHAR_DIM, WORD_DIM, PRECISION, RECALL, F1_SCORE]

In [4]:
columns

['Training_Round',
 'Epochs',
 'Scenario',
 'Embedding',
 'Tagging_Scheme',
 'Capitalisation_Dim',
 'Lower',
 'Char_LSTM_Dim',
 'Word_LSTM_Dim',
 'Precision',
 'Recall',
 'F1_Score']

In [5]:
pd.set_option('display.max_columns', 1000)
training_data_df = pd.read_csv('resultados_sem_outliers.csv')
training_data_df 

Unnamed: 0.1,Unnamed: 0,Training_Round,Epochs,Scenario,Embedding,Tagging_Scheme,Capitalisation_Dim,Lower,Char_LSTM_Dim,Word_LSTM_Dim,Precision,Recall,F1_Score
0,320,0,5,selective,FastText,IOB2,False,False,25,100,58.83,56.51,57.64
1,352,1,5,selective,FastText,IOB2,False,False,25,100,64.86,50.77,56.96
2,384,2,5,selective,FastText,IOB2,False,False,25,100,63.22,51.77,56.93
3,416,3,5,selective,FastText,IOB2,False,False,25,100,59.05,56.76,57.88
4,448,4,5,selective,FastText,IOB2,False,False,25,100,58.87,55.11,56.93
5,480,5,5,selective,FastText,IOB2,False,False,25,100,59.32,48.73,53.5
6,512,6,5,selective,FastText,IOB2,False,False,25,100,59.65,50.99,54.98
7,544,7,5,selective,FastText,IOB2,False,False,25,100,63.9,50.59,56.47
8,576,8,5,selective,FastText,IOB2,False,False,25,100,59.73,56.47,58.05
9,608,9,5,selective,FastText,IOB2,False,False,25,100,62.26,55.36,58.61


In [6]:
training_data_df = training_data_df[training_data_df.Lower == True]
len(training_data_df)

581

In [7]:
training_data_df.groupby([TAGGING_SCHEME]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Tagging_Scheme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
IOB2,292.0,60.648116,7.139823,44.34,52.115,63.765,65.83,68.26
IOBES,289.0,60.229516,7.452188,42.97,50.5,63.59,65.86,67.99


In [8]:
training_data_df.groupby([EMBEDDING]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Embedding,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Wang2Vec,141.0,66.504043,0.848442,63.18,66.1,66.48,67.01,68.26
FastText,140.0,65.011429,1.225288,56.8,64.45,65.18,65.7225,66.9
Glove,145.0,62.602,1.003969,59.96,61.9,62.7,63.31,64.89
Word2Vec,155.0,48.771742,1.816029,42.97,47.79,49.0,49.965,52.42


In [9]:
training_data_df.groupby([CAP_DIM]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Capitalisation_Dim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,295.0,60.599627,7.315023,43.81,51.325,63.76,66.085,68.26
False,286.0,60.27514,7.280542,42.97,51.505,63.48,65.68,68.07


In [10]:
training_data_df.groupby([CHAR_DIM]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Char_LSTM_Dim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
50,288.0,60.456979,7.206507,45.04,51.515,63.665,65.93,68.07
25,293.0,60.423106,7.390465,42.97,51.41,63.6,65.75,68.26


In [11]:
training_data_df.groupby([WORD_DIM]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Word_LSTM_Dim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
100,290.0,60.642345,7.24822,42.97,51.5075,63.88,66.0875,68.26
200,291.0,60.238144,7.345432,43.81,51.35,63.55,65.69,68.07


In [12]:
training_data_df.groupby([EMBEDDING, TAGGING_SCHEME, CAP_DIM, WORD_DIM, CHAR_DIM]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,mean,std,min,25%,50%,75%,max
Embedding,Tagging_Scheme,Capitalisation_Dim,Word_LSTM_Dim,Char_LSTM_Dim,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Wang2Vec,IOB2,False,100,50,10.0,66.901,0.721641,65.44,66.425,67.105,67.4925,67.69
Wang2Vec,IOB2,True,100,25,9.0,66.89,1.152801,64.87,66.2,66.92,67.86,68.26
Wang2Vec,IOB2,False,200,50,6.0,66.818333,1.05095,65.21,66.2075,67.155,67.36,68.07
Wang2Vec,IOBES,True,200,25,10.0,66.788,0.771432,65.54,66.275,66.845,67.2,67.99
Wang2Vec,IOBES,True,100,50,9.0,66.758889,0.51765,66.25,66.42,66.63,66.73,67.79
Wang2Vec,IOB2,True,100,50,10.0,66.668,0.790974,65.0,66.33,66.755,67.2925,67.64
Wang2Vec,IOB2,True,200,50,9.0,66.595556,0.441902,65.94,66.28,66.65,66.82,67.22
Wang2Vec,IOBES,True,100,25,10.0,66.55,1.105039,63.99,66.2625,66.44,67.2775,67.87
Wang2Vec,IOB2,True,200,25,7.0,66.38,0.727805,65.26,66.07,66.31,66.69,67.57
Wang2Vec,IOBES,False,100,50,7.0,66.371429,0.869127,64.95,65.895,66.65,66.815,67.58


In [13]:
training_data_df.groupby([TRAINING_ROUND]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Training_Round,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
4,52.0,60.984231,6.93501,47.28,58.49,64.165,65.69,68.26
2,61.0,60.732951,7.449083,44.08,52.42,64.27,66.23,68.07
9,56.0,60.657679,7.146791,46.94,52.1925,63.655,65.87,67.87
8,60.0,60.625833,6.869073,46.61,58.7675,63.62,65.5025,67.73
1,60.0,60.601667,7.236562,46.26,51.45,63.4,66.22,67.99
5,58.0,60.431207,7.341524,47.59,50.57,63.69,66.1125,67.86
6,60.0,60.307667,7.453801,45.04,50.695,63.575,65.6475,67.74
3,59.0,60.250169,7.642747,44.33,50.31,63.61,65.855,67.79
0,61.0,60.226721,7.475321,42.97,51.5,63.18,65.86,67.57
7,54.0,59.576852,7.800555,43.81,50.1575,63.075,65.64,67.53


In [14]:
import pandas
import researchpy as rp
import seaborn as sns

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp

In [15]:
rp.summary_cont(training_data_df[F1_SCORE])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,F1_Score,581.0,60.439897,7.293584,0.302589,59.845593,61.0342


In [16]:
rp.summary_cont(training_data_df.groupby(EMBEDDING))[F1_SCORE]





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
Embedding,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FastText,140,65.011429,1.225288,0.103556,64.808459,65.214398
Glove,145,62.602,1.003969,0.083375,62.438585,62.765415
Wang2Vec,141,66.504043,0.848442,0.071452,66.363997,66.644088
Word2Vec,155,48.771742,1.816029,0.145867,48.485843,49.057641


In [17]:
print(F1_SCORE, EMBEDDING, TAGGING_SCHEME, CAP_DIM, CHAR_DIM, WORD_DIM)

F1_Score Embedding Tagging_Scheme Capitalisation_Dim Char_LSTM_Dim Word_LSTM_Dim


In [18]:
# Fits the model with the interaction term
# This will also automatically include the main effects for each factor
model = ols('F1_Score ~ C(Embedding)*C(Tagging_Scheme)*C(Capitalisation_Dim)*C(Char_LSTM_Dim)*C(Word_LSTM_Dim)', training_data_df).fit()

# Seeing if the overall model is significant
print(f"Overall model F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .20f}")

Overall model F( 63, 517) =  293.305, p =  0.00000000000000000000


In [19]:
model.summary()

0,1,2,3
Dep. Variable:,F1_Score,R-squared:,0.973
Model:,OLS,Adj. R-squared:,0.969
Method:,Least Squares,F-statistic:,293.3
Date:,"Wed, 10 Apr 2019",Prob (F-statistic):,0.0
Time:,21:57:01,Log-Likelihood:,-931.41
No. Observations:,581,AIC:,1991.0
Df Residuals:,517,BIC:,2270.0
Df Model:,63,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,65.2590,0.403,161.923,0.000,64.467,66.051
C(Embedding)[T.Glove],-2.5534,0.586,-4.361,0.000,-3.704,-1.403
C(Embedding)[T.Wang2Vec],0.9597,0.605,1.588,0.113,-0.228,2.147
C(Embedding)[T.Word2Vec],-15.6220,0.570,-27.409,0.000,-16.742,-14.502
C(Tagging_Scheme)[T.IOBES],0.4343,0.586,0.742,0.459,-0.716,1.585
C(Capitalisation_Dim)[T.True],0.5770,0.570,1.012,0.312,-0.543,1.697
C(Char_LSTM_Dim)[T.50],-0.3015,0.605,-0.499,0.618,-1.489,0.886
C(Word_LSTM_Dim)[T.200],-0.6402,0.605,-1.059,0.290,-1.828,0.547
C(Embedding)[T.Glove]:C(Tagging_Scheme)[T.IOBES],-0.7099,0.852,-0.833,0.405,-2.384,0.965

0,1,2,3
Omnibus:,101.625,Durbin-Watson:,2.268
Prob(Omnibus):,0.0,Jarque-Bera (JB):,336.769
Skew:,-0.802,Prob(JB):,7.44e-74
Kurtosis:,6.368,Cond. No.,228.0


In [20]:
res = sm.stats.anova_lm(model, typ= 2)

In [28]:
type(res)

pandas.core.frame.DataFrame

In [31]:
res.to_csv('anova.csv', float_format='%.10f', columns=['PR(>F)'])

In [21]:
res

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Embedding),29805.623696,3.0,6116.62525,0.0
C(Tagging_Scheme),4.438826,1.0,2.73277,0.098916
C(Capitalisation_Dim),2.147286,1.0,1.32198,0.250769
C(Char_LSTM_Dim),4.046514,1.0,2.491242,0.115093
C(Word_LSTM_Dim),15.681142,1.0,9.654118,0.001993
C(Embedding):C(Tagging_Scheme),4.169763,3.0,0.855707,0.46397
C(Embedding):C(Capitalisation_Dim),3.084378,3.0,0.632967,0.594003
C(Tagging_Scheme):C(Capitalisation_Dim),0.033966,1.0,0.020911,0.885078
C(Embedding):C(Char_LSTM_Dim),14.835709,3.0,3.044542,0.02847
C(Tagging_Scheme):C(Char_LSTM_Dim),2.219961,1.0,1.366722,0.242915


In [22]:
# Calculating effect size
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1])
    
    cols = ['sum_sq', 'mean_sq', 'df', 'F', 'PR(>F)', 'eta_sq', 'omega_sq']
    aov = aov[cols]
    return aov

anova_table(res)

Unnamed: 0,sum_sq,mean_sq,df,F,PR(>F),eta_sq,omega_sq
C(Embedding),29805.623696,9935.207899,3.0,6116.62525,0.0,0.96875,0.9685403
C(Tagging_Scheme),4.438826,4.438826,1.0,2.73277,0.098916,0.000144,9.147373e-05
C(Capitalisation_Dim),2.147286,2.147286,1.0,1.32198,0.250769,7e-05,1.699747e-05
C(Char_LSTM_Dim),4.046514,4.046514,1.0,2.491242,0.115093,0.000132,7.872338e-05
C(Word_LSTM_Dim),15.681142,15.681142,1.0,9.654118,0.001993,0.00051,0.000456855
C(Embedding):C(Tagging_Scheme),4.169763,1.389921,3.0,0.855707,0.46397,0.000136,-2.28519e-05
C(Embedding):C(Capitalisation_Dim),3.084378,1.028126,3.0,0.632967,0.594003,0.0001,-5.812749e-05
C(Tagging_Scheme):C(Capitalisation_Dim),0.033966,0.033966,1.0,0.020911,0.885078,1e-06,-5.168657e-05
C(Embedding):C(Char_LSTM_Dim),14.835709,4.945236,3.0,3.044542,0.02847,0.000482,0.000323797
C(Tagging_Scheme):C(Char_LSTM_Dim),2.219961,2.219961,1.0,1.366722,0.242915,7.2e-05,1.935944e-05


In [52]:
TUKEY_COLUMNS = ['Group 1', 'Group 2', 'Mean Difference', 'Lower', 'Upper', 'Reject?']
def generate_tukeyhsd_results(group):
    mc = statsmodels.stats.multicomp.MultiComparison(training_data_df[F1_SCORE], training_data_df[group])
    mc_results = mc.tukeyhsd()
    print(mc_results)
    df = pd.DataFrame(data=mc_results._results_table.data[1:], columns=mc_results._results_table.data[0])
    df.columns = TUKEY_COLUMNS
    df.to_csv('tukey_' + group.lower() + '.csv', float_format='%.5f', index=False)

In [53]:
generate_tukeyhsd_results(EMBEDDING)

 Multiple Comparison of Means - Tukey HSD,FWER=0.05
 group1   group2  meandiff  lower    upper   reject
---------------------------------------------------
FastText  Glove   -2.4094  -2.8037  -2.0151   True 
FastText Wang2Vec  1.4926   1.0956   1.8897   True 
FastText Word2Vec -16.2397 -16.6277 -15.8517  True 
 Glove   Wang2Vec  3.902    3.5085   4.2956   True 
 Glove   Word2Vec -13.8303 -14.2147 -13.4458  True 
Wang2Vec Word2Vec -17.7323 -18.1196 -17.345   True 
---------------------------------------------------


In [54]:
generate_tukeyhsd_results(CAP_DIM)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff lower  upper reject
------------------------------------------
False   True   0.3245  -0.865 1.514 False 
------------------------------------------


In [55]:
generate_tukeyhsd_results(TAGGING_SCHEME)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower  upper  reject
--------------------------------------------
 IOB2  IOBES  -0.4186  -1.6078 0.7706 False 
--------------------------------------------


In [56]:
generate_tukeyhsd_results(CHAR_DIM)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower  upper  reject
--------------------------------------------
  25     50    0.0339  -1.1558 1.2236 False 
--------------------------------------------


In [57]:
generate_tukeyhsd_results(WORD_DIM)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower  upper reject
-------------------------------------------
 100    200   -0.4042  -1.5934 0.785 False 
-------------------------------------------
