In [1]:
import json
import pandas as pd
from pathlib import Path
from pprint import pprint
import datetime
import time
from scipy import stats
import numpy as np
import matplotlib as plt

import pandas
import researchpy as rp
import seaborn as sns

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp

%matplotlib inline

In [2]:
TRAINING_ROUND = 'Training_Round'
EPOCHS = 'Epochs'
SCENARIO = 'Scenario'
EMBEDDING = 'Embedding'
TAGGING_SCHEME = 'Tagging_Scheme'
CAP_DIM = 'Capitalisation_Dim'
LOWER = 'Lower'
CHAR_DIM = 'Char_LSTM_Dim'
WORD_DIM = 'Word_LSTM_Dim'
PRECISION = 'Precision'
RECALL = 'Recall'
F1_SCORE = 'F1_Score'

In [3]:
columns = [TRAINING_ROUND, EPOCHS, SCENARIO, EMBEDDING, TAGGING_SCHEME, CAP_DIM, LOWER, CHAR_DIM, WORD_DIM, PRECISION, RECALL, F1_SCORE]

In [4]:
columns

['Training_Round',
 'Epochs',
 'Scenario',
 'Embedding',
 'Tagging_Scheme',
 'Capitalisation_Dim',
 'Lower',
 'Char_LSTM_Dim',
 'Word_LSTM_Dim',
 'Precision',
 'Recall',
 'F1_Score']

In [5]:
pd.set_option('display.max_columns', 1000)
training_data_df = pd.read_csv('resultados.csv')
training_data_df

Unnamed: 0,Training_Round,Epochs,Scenario,Embedding,Tagging_Scheme,Capitalisation_Dim,Lower,Char_LSTM_Dim,Word_LSTM_Dim,Precision,Recall,F1_Score
0,0,5,selective,Wang2Vec,iob,False,False,25,100,58.95,54.79,56.79
1,0,5,selective,Wang2Vec,iob,False,False,25,200,61.60,52.06,56.43
2,0,5,selective,Wang2Vec,iob,False,False,50,100,66.50,47.47,55.40
3,0,5,selective,Wang2Vec,iob,False,False,50,200,58.54,50.74,54.36
4,0,5,selective,Wang2Vec,iob,False,True,25,100,66.28,67.73,67.00
5,0,5,selective,Wang2Vec,iob,False,True,25,200,71.00,62.67,66.58
6,0,5,selective,Wang2Vec,iob,False,True,50,100,70.14,65.18,67.57
7,0,5,selective,Wang2Vec,iob,False,True,50,200,66.93,63.57,65.21
8,0,5,selective,Wang2Vec,iob,True,False,25,100,58.62,57.55,58.08
9,0,5,selective,Wang2Vec,iob,True,False,25,200,64.42,50.45,56.59


In [6]:
training_data_df = training_data_df[training_data_df.Scenario == 'selective']
len(training_data_df)

1280

In [7]:
z = np.abs(stats.zscore(training_data_df[F1_SCORE]))
print(z)
training_data_df = training_data_df[pd.Series([False if el > 3 else True for el in z]).values]
training_data_df

[0.13270249 0.10281247 0.01729379 ... 0.56307084 0.58548835 0.53816248]


Unnamed: 0,Training_Round,Epochs,Scenario,Embedding,Tagging_Scheme,Capitalisation_Dim,Lower,Char_LSTM_Dim,Word_LSTM_Dim,Precision,Recall,F1_Score
0,0,5,selective,Wang2Vec,iob,False,False,25,100,58.95,54.79,56.79
1,0,5,selective,Wang2Vec,iob,False,False,25,200,61.60,52.06,56.43
2,0,5,selective,Wang2Vec,iob,False,False,50,100,66.50,47.47,55.40
3,0,5,selective,Wang2Vec,iob,False,False,50,200,58.54,50.74,54.36
4,0,5,selective,Wang2Vec,iob,False,True,25,100,66.28,67.73,67.00
5,0,5,selective,Wang2Vec,iob,False,True,25,200,71.00,62.67,66.58
6,0,5,selective,Wang2Vec,iob,False,True,50,100,70.14,65.18,67.57
7,0,5,selective,Wang2Vec,iob,False,True,50,200,66.93,63.57,65.21
8,0,5,selective,Wang2Vec,iob,True,False,25,100,58.62,57.55,58.08
9,0,5,selective,Wang2Vec,iob,True,False,25,200,64.42,50.45,56.59


In [8]:
training_data_df.groupby([TAGGING_SCHEME]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Tagging_Scheme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
iob,618.0,57.030437,7.469194,19.23,53.97,56.14,63.39,68.26
iobes,617.0,56.940665,7.718509,19.73,54.1,56.43,63.36,67.99


In [9]:
training_data_df.groupby([EMBEDDING]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Embedding,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Wang2Vec,305.0,60.004656,7.795033,19.23,55.99,58.05,66.44,68.26
FastText,305.0,59.104918,8.062926,21.27,56.19,57.86,65.06,67.05
Glove,309.0,57.932589,6.035712,19.73,54.88,57.03,62.63,64.89
Word2Vec,316.0,51.100032,4.469993,22.25,48.7975,51.545,54.43,56.92


In [10]:
training_data_df.groupby([CAP_DIM]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Capitalisation_Dim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,625.0,57.27856,7.218754,19.73,54.14,56.46,63.55,68.26
False,610.0,56.68541,7.950705,19.23,53.9,56.15,63.1625,68.07


In [11]:
training_data_df.groupby([CHAR_DIM]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Char_LSTM_Dim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
25,617.0,57.163063,7.310514,19.73,54.05,56.24,63.39,68.26
50,618.0,56.808398,7.86459,19.23,54.045,56.26,63.37,68.07


In [12]:
training_data_df.groupby([WORD_DIM]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Word_LSTM_Dim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
100,626.0,57.135272,7.611859,19.23,54.31,56.375,63.37,68.26
200,609.0,56.831724,7.574353,19.73,53.82,56.13,63.39,68.07


In [13]:
training_data_df.groupby([LOWER]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Lower,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,612.0,59.713399,8.241165,22.55,50.5375,63.4,65.74,68.26
False,623.0,54.305939,5.754651,19.23,54.31,55.47,56.475,59.82


In [14]:
training_data_df.groupby([EMBEDDING, TAGGING_SCHEME, CAP_DIM, WORD_DIM, CHAR_DIM, LOWER]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,count,mean,std,min,25%,50%,75%,max
Embedding,Tagging_Scheme,Capitalisation_Dim,Word_LSTM_Dim,Char_LSTM_Dim,Lower,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Wang2Vec,iob,False,100,50,True,10.0,66.901000,0.721641,65.44,66.4250,67.105,67.4925,67.69
Wang2Vec,iob,False,200,50,True,6.0,66.818333,1.050950,65.21,66.2075,67.155,67.3600,68.07
Wang2Vec,iobes,True,200,25,True,10.0,66.788000,0.771432,65.54,66.2750,66.845,67.2000,67.99
Wang2Vec,iob,True,100,50,True,10.0,66.668000,0.790974,65.00,66.3300,66.755,67.2925,67.64
Wang2Vec,iobes,True,100,25,True,10.0,66.550000,1.105039,63.99,66.2625,66.440,67.2775,67.87
Wang2Vec,iobes,True,100,50,True,10.0,66.446000,1.103260,63.63,66.3150,66.630,66.7175,67.79
Wang2Vec,iobes,True,200,50,True,10.0,66.328000,0.419571,65.60,66.2125,66.460,66.6125,66.78
Wang2Vec,iobes,False,200,50,True,9.0,66.315556,0.910647,64.87,66.1600,66.180,66.8500,67.94
Wang2Vec,iobes,False,100,25,True,9.0,66.250000,0.560134,65.51,65.6300,66.470,66.6300,66.92
Wang2Vec,iob,False,200,25,True,9.0,66.227778,0.446760,65.47,65.8600,66.250,66.5100,66.93


In [15]:
training_data_df.groupby([TRAINING_ROUND]).describe()[F1_SCORE].sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Training_Round,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,125.0,57.4752,6.581119,29.93,54.47,56.5,63.17,67.57
4,121.0,57.434298,6.212425,36.85,53.97,56.48,63.4,68.26
2,125.0,57.32208,7.756478,22.25,54.68,56.35,64.04,68.07
9,125.0,57.23488,6.70379,27.68,54.31,56.33,63.12,67.87
1,124.0,57.164355,7.429988,21.65,54.1025,55.815,63.39,67.99
6,125.0,57.1456,7.49006,24.07,54.15,56.51,63.27,67.74
3,123.0,56.67813,8.296802,24.32,54.02,56.17,63.575,67.79
8,124.0,56.676613,8.047166,21.51,53.1575,56.08,63.355,67.73
7,122.0,56.365164,7.958107,19.73,53.32,56.05,62.62,67.53
5,121.0,56.332149,9.1785,19.23,53.68,56.03,63.31,67.86


In [16]:
rp.summary_cont(training_data_df[F1_SCORE])





Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,F1_Score,1235.0,56.985587,7.591828,0.216029,56.561761,57.409413


In [17]:
rp.summary_cont(training_data_df.groupby(EMBEDDING))[F1_SCORE]





Unnamed: 0_level_0,N,Mean,SD,SE,95% Conf.,Interval
Embedding,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FastText,305,59.104918,8.062926,0.461682,58.200022,60.009814
Glove,309,57.932589,6.035712,0.34336,57.259604,58.605574
Wang2Vec,305,60.004656,7.795033,0.446342,59.129825,60.879487
Word2Vec,316,51.100032,4.469993,0.251457,50.607176,51.592887


In [18]:
print(F1_SCORE, EMBEDDING, TAGGING_SCHEME, CAP_DIM, CHAR_DIM, WORD_DIM, LOWER)

F1_Score Embedding Tagging_Scheme Capitalisation_Dim Char_LSTM_Dim Word_LSTM_Dim Lower


In [19]:
# Fits the model with the interaction term
# This will also automatically include the main effects for each factor
model = ols('F1_Score ~ C(Embedding)*C(Tagging_Scheme)*C(Capitalisation_Dim)*C(Char_LSTM_Dim)*C(Word_LSTM_Dim)*C(Lower)', training_data_df).fit()

# Seeing if the overall model is significant
print(f"Overall model F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .20f}")

Overall model F( 127, 1107) =  11.344, p =  0.00000000000000000000


In [20]:
model.mse_model, model.mse_resid, model.mse_total

(316.6834896190153, 27.916753629428882, 57.63585854894061)

In [21]:
model.summary()

0,1,2,3
Dep. Variable:,F1_Score,R-squared:,0.565
Model:,OLS,Adj. R-squared:,0.516
Method:,Least Squares,F-statistic:,11.34
Date:,"Tue, 09 Apr 2019",Prob (F-statistic):,5.63e-130
Time:,22:23:00,Log-Likelihood:,-3740.6
No. Observations:,1235,AIC:,7737.0
Df Residuals:,1107,BIC:,8392.0
Df Model:,127,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,56.7950,1.671,33.992,0.000,53.517,60.073
C(Embedding)[T.Glove],-1.1490,2.363,-0.486,0.627,-5.785,3.487
C(Embedding)[T.Wang2Vec],-1.1730,2.363,-0.496,0.620,-5.809,3.463
C(Embedding)[T.Word2Vec],-5.2860,2.363,-2.237,0.025,-9.922,-0.650
C(Tagging_Scheme)[T.iobes],-10.9430,2.363,-4.631,0.000,-15.579,-6.307
C(Capitalisation_Dim)[T.True],-0.5390,2.363,-0.228,0.820,-5.175,4.097
C(Char_LSTM_Dim)[T.50],-0.4817,2.428,-0.198,0.843,-5.245,4.282
C(Word_LSTM_Dim)[T.200],-1.3888,2.506,-0.554,0.580,-6.306,3.529
C(Lower)[T.True],8.4640,2.363,3.582,0.000,3.828,13.100

0,1,2,3
Omnibus:,1096.207,Durbin-Watson:,2.003
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28617.023
Skew:,-4.181,Prob(JB):,0.0
Kurtosis:,25.05,Cond. No.,591.0


In [22]:
res = sm.stats.anova_lm(model, typ= 2)

In [23]:
res

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Embedding),15634.307190,3.0,186.677713,5.995006e-98
C(Tagging_Scheme),3.390585,1.0,0.121453,7.275297e-01
C(Capitalisation_Dim),58.050738,1.0,2.079423,1.495795e-01
C(Char_LSTM_Dim),30.411899,1.0,1.089378,2.968369e-01
C(Word_LSTM_Dim),21.109064,1.0,0.756143,3.847267e-01
C(Lower),9295.791169,1.0,332.982527,3.029907e-65
C(Embedding):C(Tagging_Scheme),140.257542,3.0,1.674712,1.706998e-01
C(Embedding):C(Capitalisation_Dim),67.188428,3.0,0.802247,4.926646e-01
C(Tagging_Scheme):C(Capitalisation_Dim),95.220703,1.0,3.410880,6.503463e-02
C(Embedding):C(Char_LSTM_Dim),27.585483,3.0,0.329378,8.041220e-01


In [24]:
# Calculating effect size
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1])
    
    cols = ['sum_sq', 'mean_sq', 'df', 'F', 'PR(>F)', 'eta_sq', 'omega_sq']
    aov = aov[cols]
    return aov

anova_table(res)

Unnamed: 0,sum_sq,mean_sq,df,F,PR(>F),eta_sq,omega_sq
C(Embedding),15634.307190,5211.435730,3.0,186.677713,5.995006e-98,2.188563e-01,0.217599
C(Tagging_Scheme),3.390585,3.390585,1.0,0.121453,7.275297e-01,4.746299e-05,-0.000343
C(Capitalisation_Dim),58.050738,58.050738,1.0,2.079423,1.495795e-01,8.126212e-04,0.000422
C(Char_LSTM_Dim),30.411899,30.411899,1.0,1.089378,2.968369e-01,4.257199e-04,0.000035
C(Word_LSTM_Dim),21.109064,21.109064,1.0,0.756143,3.847267e-01,2.954945e-04,-0.000095
C(Lower),9295.791169,9295.791169,1.0,332.982527,3.029907e-65,1.301268e-01,0.129685
C(Embedding):C(Tagging_Scheme),140.257542,46.752514,3.0,1.674712,1.706998e-01,1.963390e-03,0.000791
C(Embedding):C(Capitalisation_Dim),67.188428,22.396143,3.0,0.802247,4.926646e-01,9.405348e-04,-0.000232
C(Tagging_Scheme):C(Capitalisation_Dim),95.220703,95.220703,1.0,3.410880,6.503463e-02,1.332944e-03,0.000942
C(Embedding):C(Char_LSTM_Dim),27.585483,9.195161,3.0,0.329378,8.041220e-01,3.861544e-04,-0.000786


In [25]:
mc = statsmodels.stats.multicomp.MultiComparison(training_data_df[F1_SCORE], training_data_df[EMBEDDING])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
 group1   group2  meandiff  lower    upper  reject
--------------------------------------------------
FastText  Glove   -1.1723  -2.5697   0.225  False 
FastText Wang2Vec  0.8997  -0.5022   2.3017 False 
FastText Word2Vec -8.0049  -9.3945  -6.6152  True 
 Glove   Wang2Vec  2.0721   0.6747   3.4694  True 
 Glove   Word2Vec -6.8326  -8.2176  -5.4475  True 
Wang2Vec Word2Vec -8.9046  -10.2943  -7.515  True 
--------------------------------------------------


In [26]:
mc = statsmodels.stats.multicomp.MultiComparison(training_data_df[F1_SCORE], training_data_df[CAP_DIM])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower  upper  reject
--------------------------------------------
False   True   0.5932  -0.2543 1.4406 False 
--------------------------------------------


In [27]:
mc = statsmodels.stats.multicomp.MultiComparison(training_data_df[F1_SCORE], training_data_df[LOWER])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff lower  upper  reject
-------------------------------------------
False   True   5.4075  4.6151 6.1998  True 
-------------------------------------------


In [28]:
mc = statsmodels.stats.multicomp.MultiComparison(training_data_df[F1_SCORE], training_data_df[TAGGING_SCHEME])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower  upper  reject
--------------------------------------------
 iob   iobes  -0.0898  -0.9378 0.7582 False 
--------------------------------------------


In [29]:
mc = statsmodels.stats.multicomp.MultiComparison(training_data_df[F1_SCORE], training_data_df[CHAR_DIM])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower  upper  reject
--------------------------------------------
  25     50   -0.3547  -1.2024 0.4931 False 
--------------------------------------------


In [30]:
mc = statsmodels.stats.multicomp.MultiComparison(training_data_df[F1_SCORE], training_data_df[WORD_DIM])
mc_results = mc.tukeyhsd()
print(mc_results)

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower  upper  reject
--------------------------------------------
 100    200   -0.3035  -1.1515 0.5444 False 
--------------------------------------------
