In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None
import numpy as np

import scipy.stats

from sklearn.metrics import roc_auc_score, f1_score

import os

In [2]:
this_dir = os.getcwd()
os.chdir('..')
test_perform_full = pd.read_csv( 'data/test_perform_full.csv' )
# issue with reading 'None' string as None/NaN datatype
test_perform_full['strat'] = test_perform_full['strat'].fillna('None')
os.chdir(this_dir)

In [6]:
def calculate_two_tail_zscore(rand_row, hard_row, met):

    rand_mean, rand_var = rand_row[met+'_mean'], rand_row[met+'_var']
    hard_mean, hard_var = hard_row[met+'_mean'], hard_row[met+'_var']

    n_rand, n_hard = 22, 22
    pooled_var = ( ((n_rand-1)*rand_var) + ((n_hard-1)*hard_var) ) / ( n_rand+n_hard-2 )
    pooled_sem = ( (pooled_var/n_rand) + (pooled_var/n_hard) )**0.5

    this_z = ( hard_mean - rand_mean ) / pooled_sem

    return this_z


def create_table4(test_perform_full):
    perform_cols = [ 'test_acc', 'test_auc', 'test_f1', 'irt_ability' ]

    perform_d = {
        'test_acc': 'Acc.',
        'test_auc': 'AUC',
        'test_f1': 'F1 Score',
        'irt_ability': 'Ability',
    }

    sv_d = {
        'Anxiety': 'Anxiety',
        'Numeracy': 'Numeracy',
        'SubjectiveLit': 'Literacy',
        'TrustPhys': 'Trust',
        'wer': 'Depr.'
    }

    perform_gdf = test_perform_full.groupby(['score_var', 'strat'])[perform_cols].agg(['mean', 'var'])
    perform_gdf.columns = ['_'.join(t) for t in perform_gdf.columns]


    perform_mean_diff_df = pd.DataFrame()

    for svar in test_perform_full['score_var'].unique():

        rand_row = perform_gdf.loc[(svar, 'None'), :]
        hard_row = perform_gdf.loc[(svar, 'Constant'), :]

        sub_df = pd.DataFrame()
        for met in perform_cols:
            rand_mean, rand_std = rand_row[met+'_mean'], rand_row[met+'_var']**.05
            hard_mean, hard_std = hard_row[met+'_mean'], hard_row[met+'_var']**0.5


            # note we consider the random sample to be "true"
            this_zscore = calculate_two_tail_zscore(rand_row, hard_row, met)
            # print(svar, met)

            # two sided test so need area above and below
            # total_prob = st.norm.cdf(-this_zscore)
            total_prob = scipy.stats.norm.sf(abs(this_zscore))*2

            this_sig = '*' if total_prob < 0.05 else ''
            
            # FOR SLIM TABLE
            pool_std = ((hard_std**2) + (rand_std**2) / 2)**0.5
            new_row = pd.DataFrame.from_dict([{
                'Dep. Var.': sv_d[svar], 'Metric': perform_d[met],
                'Mean Diff.': f'{hard_mean-rand_mean:.3f}{this_sig}',
                'Pool. SD': f'{pool_std:.3f}',
            }])
            sub_df = pd.concat([sub_df, new_row])


        perform_mean_diff_df = pd.concat([ perform_mean_diff_df, sub_df ])

    perform_mean_diff_df = perform_mean_diff_df.reset_index(drop=True)

    perform_mean_diff_df = perform_mean_diff_df.set_index(['Dep. Var.', 'Metric'])
    perform_mean_diff_df.index.names = [ None, None ]
    perform_mean_diff_df = perform_mean_diff_df[['Mean Diff.']].unstack(level=-1)
    perform_mean_diff_df.columns = [ t[1] for t in  perform_mean_diff_df.columns ]

    return perform_mean_diff_df


In [7]:
perform_mean_diff_df_wer = create_table4(test_perform_full)

In [8]:
perform_mean_diff_df_wer

Unnamed: 0,AUC,Ability,Acc.,F1 Score
Anxiety,0.001,0.027,-0.005,0.055
Depr.,0.019*,-0.044,-0.013,-0.112*
Literacy,-0.056*,-0.856,-0.034,-0.051
Numeracy,-0.034*,-0.322,-0.008,0.052
Trust,-0.050*,-1.226*,-0.038,-0.071


In [6]:
perform_mean_diff_df_wer.to_latex( 'perform_mean_diff_df_wer_v2.tex' )