In [None]:
import pandas as pd
import itertools
from scipy import stats
import pingouin as pg

## Calculate variation per effect (std)

In [None]:
df = pd.read_csv('figure_source_data/fig_4/stat_results_quantifications_wue1.csv')

# Filter data
df = df[~((df.ens.str.startswith('expert')&(df.model=='ensemble')))].copy()

# Create model type variable
df['model_type'] = 'consensus_ensemble'
df.loc[(df.ens.str.startswith('consensus'))&(df.model.str.startswith('model')), 'model_type'] = 'consensus_model'
df.loc[df.ens.str.startswith('expert'), 'model_type'] = 'expert_model'

# Create long format dataframe
long_list = []
for grp_name in ['mwu1_vs_2', 'mwu1_vs_3', 'mwu2_vs_3']:
    df_tmp = df.set_index(['subarea', 'model_type', 'type'])
    df_tmp = df_tmp[['eta^2_'+grp_name]+[grp_name]]
                                                           
    df_tmp.rename(columns={'eta^2_'+grp_name:'eta', grp_name:'vote'}, inplace=True)
    df_tmp['effect'] = grp_name
    long_list += [df_tmp]
df_long = pd.concat(long_list, sort=False).reset_index()

# Calc variation per effect (std)
df_var = df_long.groupby(['subarea','type','model_type', 'effect'])['eta'].agg(['std']).reset_index()

## Compute statistics for variation per effect and save results

In [None]:
param = 'std'

l_results = []
s_ens = df_var.loc[df_var.model_type == 'consensus_ensemble', param]
s_cmod = df_var.loc[df_var.model_type == 'consensus_model', param]
s_emod = df_var.loc[df_var.model_type == 'expert_model', param]

#Check for normal distribution with Shapiro-Wilk
grp_norms = []
for grp in [s_ens, s_cmod, s_emod]:
    if grp.size > 3:
        grp_norms.append(stats.shapiro(grp)[1]>0.05)
s_grp_norms = pd.Series(grp_norms)

#Check for equality of variances with Levene
levene = stats.levene(s_ens, s_cmod, s_emod, center = 'mean')[1]>0.05

#Check if pre-requisites for parametric testing with One-Way ANOVA are fulfilled
if (s_grp_norms.all() == True & levene == True):
    check_aov = True
else:
    check_aov = False

#Calculate One-Way ANOVA
aov = pg.anova(data = df_var, between='model_type', dv=param, detailed=False)
aov_dof1, aov_dof2, aov_fstat, aov_pval = aov['ddof1'][0], aov['ddof2'][0], aov['F'][0], aov['p-unc'][0]

#Calculate Kruskal-Wallis ANOVA
kwa_hstat, kwa_pval = stats.kruskal(s_ens, s_cmod, s_emod)

#Calculate results for pairwise comparisons - parametric and non-parametric:
l_t_res = []
l_mwu_res = []
for (grp1, grp2) in list(itertools.combinations(['consensus_ensemble', 'consensus_model', 'expert_model'], 2)):
    ttest = pg.ttest(df_var.loc[df_var.model_type == grp1, param], df_var.loc[df_var.model_type == grp2, param], tail = 'two-sided', correction = 'auto')
    l_t_res.append((ttest['dof']['T-test'], ttest['T']['T-test'], ttest['p-val']['T-test']))
    u_stat, pval = stats.mannwhitneyu(df_var.loc[df_var.model_type == grp1, param],df_var.loc[df_var.model_type == grp2, param], alternative = 'two-sided')
    l_mwu_res.append((u_stat, pval))

#Append results to list
l_results.append((param, df_var.loc[df_var.model_type == 'ens', param].mean(), 
                  df_var.loc[df_var.model_type == 'single', param].mean(),
                  df_var.loc[df_var.model_type == 'single_coder', param].mean(), 
                  check_aov, aov_dof1, aov_dof2, aov_fstat, aov_pval, 
                  l_t_res[0][0], l_t_res[0][1], l_t_res[0][2],
                  l_t_res[1][0], l_t_res[1][1], l_t_res[1][2],
                  l_t_res[2][0], l_t_res[2][1], l_t_res[2][2],
                  kwa_hstat, kwa_pval,
                  l_mwu_res[0][0], l_mwu_res[0][1],
                  l_mwu_res[1][0], l_mwu_res[1][1],
                  l_mwu_res[2][0], l_mwu_res[2][1]))

#Create results DataFrame
l_columns = ['Measure', 'Mean_grp1', 'Mean_grp2', 'Mean_grp3',
             'ANOVA_ok', 'ddof1', 'ddof2', 'ANOVA_F_stat', 'ANOVA_p_value', 
              'T_test_dof_grp1_2', 'T_test_T_stat_grp1_2', 'T_test_pval_grp1_2',
              'T_test_dof_grp1_3', 'T_test_T_stat_grp1_3', 'T_test_pval_grp1_3',
              'T_test_dof_grp2_3', 'T_test_T_stat_grp2_3', 'T_test_pval_grp2_3',
              'Kruskal_Wallis_ANOVA_H_stat', 'Kruskal_Wallis_ANOVA_pval',
              'MWU_U_stat_grp1_2', 'MWU_pval_grp1_2',
              'MWU_U_stat_grp1_3', 'MWU_pval_grp1_3',
              'MWU_U_stat_grp2_3', 'MWU_pval_grp2_3'] 
df_results = pd.DataFrame(l_results, columns = l_columns)
df_results.set_index('Measure', inplace=True)
df_results.to_excel('figure_source_data/fig_4/stat_results_variation_per_effect.xlsx')