In [4]:
import numpy as np
import scipy.stats as stats
import scikit_posthocs as sp

In [5]:
glove_scores = [49.3, 45.5, 50.3, 49.9, 49.7, 55.7]
word2vec_scores = [49.9, 47.9, 53.0, 46.6, 48.7, 54.6]
bert_scores = [50.7, 48.3, 50.3, 45.7, 50.1, 53.7]
roberta_scores = [52.0, 49.2, 53.1, 46.5, 49.7, 54.8]
gpt2_scores = [53.6, 43.4, 52.2, 47.8, 50.6, 54.6]

# Kruskal-Wallis

## H1 Test

In [6]:
# Perform the Kruskal-Wallis test
statistic, p_value = stats.kruskal(glove_scores, word2vec_scores, bert_scores, roberta_scores, gpt2_scores)

# Print the results
print("Kruskal-Wallis Test")
print("Statistic:", statistic)
print("p-value:", p_value)

Kruskal-Wallis Test
Statistic: 0.42618570474281536
p-value: 0.9802774083495136


## H2 Test

In [8]:
lr = [49.3, 49.9, 50.7, 52.0, 53.6]
knn = [45.5, 47.9, 48.3, 49.2, 43.4]
svm = [50.3, 53.0, 50.3, 53.1, 52.2]
rf = [49.9, 46.6, 45.7, 46.5, 47.8]
xgb = [49.7, 48.7, 50.1, 49.7, 50.6]
nn = [55.7, 54.6, 53.7, 54.8, 54.6]

# Perform the Kruskal-Wallis test
statistic, p_value = stats.kruskal(lr, knn, svm, rf, xgb, nn)

# Print the results
print("Kruskal-Wallis Test")
print("Statistic:", statistic)
print("p-value:", p_value)

Kruskal-Wallis Test
Statistic: 24.15568915608995
p-value: 0.00020265458586172198


In [9]:
posthoc_results = round(sp.posthoc_dunn([lr, knn, svm, rf, xgb, nn], p_adjust='bonferroni'), 3)
print("Pairwise comparison results:")
print(posthoc_results)

Pairwise comparison results:
       1      2      3      4      5      6
1  1.000  0.337  1.000  0.510  1.000  1.000
2  0.337  1.000  0.095  1.000  1.000  0.001
3  1.000  0.095  1.000  0.153  1.000  1.000
4  0.510  1.000  0.153  1.000  1.000  0.002
5  1.000  1.000  1.000  1.000  1.000  0.161
6  1.000  0.001  1.000  0.002  0.161  1.000


In [10]:
posthoc_results > 0.05

Unnamed: 0,1,2,3,4,5,6
1,True,True,True,True,True,True
2,True,True,True,True,True,False
3,True,True,True,True,True,True
4,True,True,True,True,True,False
5,True,True,True,True,True,True
6,True,False,True,False,True,True


# Friedman's

## H1

In [16]:
stats.friedmanchisquare(glove_scores, word2vec_scores, bert_scores, roberta_scores, gpt2_scores)

FriedmanchisquareResult(statistic=3.1452991452991417, pvalue=0.5338111027888838)

## H2

In [15]:
#perform Friedman Test
stats.friedmanchisquare(lr, knn, svm, rf, xgb, nn)

FriedmanchisquareResult(statistic=20.65714285714286, pvalue=0.0009401983646974848)

In [25]:
# Combine three groups into one array
data_nemenyi = np.array([lr, knn, svm, rf, xgb, nn])

# Conduct the Nemenyi post-hoc test
p_value_nemenyi = round(sp.posthoc_nemenyi_friedman(data_nemenyi.T), 3)

print(p_value_nemenyi)

       0      1      2      3      4      5
0  1.000  0.326  0.900  0.430  0.900  0.533
1  0.326  1.000  0.114  0.900  0.826  0.003
2  0.900  0.114  1.000  0.168  0.728  0.826
3  0.430  0.900  0.168  1.000  0.900  0.005
4  0.900  0.826  0.728  0.900  1.000  0.114
5  0.533  0.003  0.826  0.005  0.114  1.000


In [24]:
p_value_nemenyi > 0.05

Unnamed: 0,0,1,2,3,4,5
0,True,True,True,True,True,True
1,True,True,True,True,True,False
2,True,True,True,True,True,True
3,True,True,True,True,True,False
4,True,True,True,True,True,True
5,True,False,True,False,True,True
