In [None]:
import pandas as pd
import pickle
from scipy import stats
import seaborn as sns
import pingouin as pg
import statsmodels.formula.api as s
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [None]:
# load data
final = pickle.load(open( '../00_data/final/competence.pkl','rb') )

# split groups
intvcontrol = final[(final['interventiongroup']=='control')]
intv2 = final[(final['interventiongroup']=='2')]
intv3 = final[(final['interventiongroup']=='3')]
intv4 = final[(final['interventiongroup']=='4')]
intv5 = final[(final['interventiongroup']=='5')]
intv6 = final[(final['interventiongroup']=='6')]

Check assumptions

In [None]:
# Levenes homogeneity
stats.levene(intvcontrol['ability'], intv2['ability'], intv3['ability'], intv4['ability'], intv5['ability'], intv6['ability'])

In [None]:
# Shapiro-Wilk test for normality 
print(stats.shapiro(intvcontrol['ability']))
print(stats.shapiro(intv2['ability']))
print(stats.shapiro(intv3['ability']))
print(stats.shapiro(intv4['ability']))
print(stats.shapiro(intv5['ability']))
print(stats.shapiro(intv6['ability']))

In [None]:
# plot histogram
gr = sns.histplot(data=final, x="ability")
gr.set(xlabel ="Competence", ylabel = "Count of users")

Get Effect sizes

In [None]:
# per Interventiongroup
ability_by_group = final.groupby(['interventiongroup']).agg({'ability':'median','Geschlecht':'count'})
ability_by_group = ability_by_group.rename(columns={'Geschlecht':'count'})

In [None]:
# Gender
ability_by_group = final.groupby(['Geschlecht','interventiongroup']).agg({'ability':'median','Klassenstufe':'count'})
ability_by_group = ability_by_group.rename(columns={'Klassenstufe':'count'})

In [None]:
# Class level
ability_by_group = final.groupby(['Klassenstufe','interventiongroup']).agg({'ability':'median','Geschlecht':'count'})
ability_by_group = ability_by_group.rename(columns={'Geschlecht':'count'})

Test significance

In [None]:
# Kruskal Wallis Test 
stats.kruskal(intvcontrol['ability'], intv2['ability'], intv3['ability'],intv4['ability'],intv5['ability'],intv6['ability'])

In [None]:
#Wilcoxon Mann Whitney Test
print(stats.mannwhitneyu(intvcontrol['ability'], intv2['ability']))
print(stats.mannwhitneyu(intvcontrol['ability'], intv3['ability']))
print(stats.mannwhitneyu(intvcontrol['ability'], intv4['ability']))
print(stats.mannwhitneyu(intvcontrol['ability'], intv5['ability']))
print(stats.mannwhitneyu(intvcontrol['ability'], intv6['ability']))

In [None]:
# plot boxplot interventiongroup
gr=sns.boxplot(data=final, x="ability", y="interventiongroup",showfliers = False)#,hue='Geschlecht'
gr.set(xlabel ="Competence", ylabel = "Intervention group")


In [None]:
# plot boxplot interventiongroup / gender
sns.boxplot(data=final, x="ability", hue="interventiongroup",y='Geschlecht',showfliers = False)

Gender Demographics

In [None]:
# split
final_m = final.loc[final.Geschlecht == 'm']
final_w = final.loc[final.Geschlecht == 'w']
final_d = final.loc[final.Geschlecht == 'd']

Boys

In [None]:
# split boys by interventiongroup
intvcontrol_m = final_m[(final_m['interventiongroup']=='control')]
intv2_m = final_m[(final_m['interventiongroup']=='2')]
intv3_m = final_m[(final_m['interventiongroup']=='3')]
intv4_m = final_m[(final_m['interventiongroup']=='4')]
intv5_m = final_m[(final_m['interventiongroup']=='5')]
intv6_m = final_m[(final_m['interventiongroup']=='6')]

# Kruskal Wallis Test 
stats.kruskal(intvcontrol_m['ability'], intv2_m['ability'], intv3_m['ability'],intv4_m['ability'],intv5_m['ability'],intv6_m['ability'])

In [None]:
#Wilcoxon Mann Whitney Test
print(stats.mannwhitneyu(intvcontrol_m['ability'], intv2_m['ability']))
print(stats.mannwhitneyu(intvcontrol_m['ability'], intv3_m['ability']))
print(stats.mannwhitneyu(intvcontrol_m['ability'], intv4_m['ability']))
print(stats.mannwhitneyu(intvcontrol_m['ability'], intv5_m['ability']))
print(stats.mannwhitneyu(intvcontrol_m['ability'], intv6_m['ability']))

In [None]:
# plot ability and interventiongroup
sns.boxplot(data=final_m, x="ability", y="interventiongroup",showfliers = False)

Girls

In [None]:
# split girls by interventiongroup
intvcontrol_w = final_w[(final_w['interventiongroup']=='control')]
intv2_w = final_w[(final_w['interventiongroup']=='2')]
intv3_w = final_w[(final_w['interventiongroup']=='3')]
intv4_w = final_w[(final_w['interventiongroup']=='4')]
intv5_w = final_w[(final_w['interventiongroup']=='5')]
intv6_w = final_w[(final_w['interventiongroup']=='6')]

# Kruskal Wallis Test 
stats.kruskal(intvcontrol_w['ability'], intv2_w['ability'], intv3_w['ability'],intv4_w['ability'],intv5_w['ability'],intv6_w['ability'])

In [None]:
#Wilcoxon Mann Whitney Test
print(stats.mannwhitneyu(intvcontrol_w['ability'], intv2_w['ability']))
print(stats.mannwhitneyu(intvcontrol_w['ability'], intv3_w['ability']))
print(stats.mannwhitneyu(intvcontrol_w['ability'], intv4_w['ability']))
print(stats.mannwhitneyu(intvcontrol_w['ability'], intv5_w['ability']))
print(stats.mannwhitneyu(intvcontrol_w['ability'], intv6_w['ability']))


In [None]:
# plot boxplot ability and interventiongroup
sns.boxplot(data=final_w, x="ability", y="interventiongroup",showfliers = False)

Divers

In [None]:
# intvcontrol_d = final_d[(final_d['interventiongroup']=='control')]
# intv2_d = final_d[(final_d['interventiongroup']=='2')]
# intv3_d = final_d[(final_d['interventiongroup']=='3')]
# intv4_d = final_d[(final_d['interventiongroup']=='4')]
# intv5_d = final_d[(final_d['interventiongroup']=='5')]
# intv6_d = final_d[(final_d['interventiongroup']=='6')]

# # Kruskal Wallis Test 
# stats.kruskal(intvcontrol_d['ability'], intv2_d['ability'], intv3_d['ability'],intv4_d['ability'],intv5_d['ability'],intv6_d['ability'])

# #Wilcoxon Mann Whitney Test
# print(stats.mannwhitneyu(intvcontrol_d['ability'], intv2_d['ability']))
# print(stats.mannwhitneyu(intvcontrol_d['ability'], intv3_d['ability']))
# print(stats.mannwhitneyu(intvcontrol_d['ability'], intv4_d['ability']))
# print(stats.mannwhitneyu(intvcontrol_d['ability'], intv5_d['ability']))
# print(stats.mannwhitneyu(intvcontrol_d['ability'], intv6_d['ability']))

# sns.boxplot(data=final_d, x="ability", y="interventiongroup",showfliers = False)

Demographics class level

In [None]:
# split by class level
final_5 = final.loc[final.Klassenstufe == '5']
final_6 = final.loc[final.Klassenstufe == '6']
final_7 = final.loc[final.Klassenstufe == '7']
final_8 = final.loc[final.Klassenstufe == '8']
final_9 = final.loc[final.Klassenstufe == '9']
final_10 = final.loc[final.Klassenstufe == '10']
final_11 = final.loc[final.Klassenstufe == '11']
final_12 = final.loc[final.Klassenstufe == '12']
final_13 = final.loc[final.Klassenstufe == '13']

klassenliste = [final_5, final_6, final_7, final_8, final_9, final_10, final_11, final_12, final_13]

for x in klassenliste:
    intvcontrol = x[(x['interventiongroup']=='control')]
    intv2 = x[(x['interventiongroup']=='2')]
    intv3 = x[(x['interventiongroup']=='3')]
    intv4 = x[(x['interventiongroup']=='4')]
    intv5 = x[(x['interventiongroup']=='5')]
    intv6 = x[(x['interventiongroup']=='6')]

    # Kruskal Wallis Test 
    print(stats.kruskal(intvcontrol['ability'], intv2['ability'], intv3['ability'],intv4['ability'],intv5['ability'],intv6['ability']))

In [None]:
# carry out wilcoxon mann whitney test for significant classes
klassenliste = [final_5, final_7, final_8, final_9]

for x in klassenliste:
    intvcontrol = x[(x['interventiongroup']=='control')]
    intv2 = x[(x['interventiongroup']=='2')]
    intv3 = x[(x['interventiongroup']=='3')]
    intv4 = x[(x['interventiongroup']=='4')]
    intv5 = x[(x['interventiongroup']=='5')]
    intv6 = x[(x['interventiongroup']=='6')]

    #Wilcoxon Mann Whitney Test
    print(stats.mannwhitneyu(intvcontrol['ability'], intv2['ability']))
    print(stats.mannwhitneyu(intvcontrol['ability'], intv3['ability']))
    print(stats.mannwhitneyu(intvcontrol['ability'], intv4['ability']))
    print(stats.mannwhitneyu(intvcontrol['ability'], intv5['ability']))
    print(stats.mannwhitneyu(intvcontrol['ability'], intv6['ability']))
    print('--------------')

In [None]:
# plot boxplots for significant groups

In [None]:
print(sns.boxplot(data=final_6, x="ability", y="interventiongroup",showfliers = False))

In [None]:
print(sns.boxplot(data=final_7, x="ability", y="interventiongroup",showfliers = False))

In [None]:
print(sns.boxplot(data=final_12, x="ability", y="interventiongroup",showfliers = False))