In [1]:
import numpy as np
import pandas as pd
%matplotlib widget
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.d00_utils.file_dir_utils import get_dir
from src.d01_data.fetch.group_conditions import group_conditions
from src.d02_intermediate.df_reshaping import add_col_by_lookup
from data import d01_raw


In [4]:
dwellT_nfixF = True   # shouldn't use both dwell and n/p fix because highly collinear
HnT_dF = True   

file_loc = "C:\\Users\\Luke\\Documents\\AlloEye\\data\\feature_saves\\alloeye_conditions.csv"
df = pd.read_csv(file_loc)

# add group and NP testing columns
pd.set_option('mode.chained_assignment', None)
drop_cols = ['Notes', 'dob', 'occupation']
pid_info = pd.read_csv(f"{os.path.abspath(d01_raw.__path__[0])}\ppt_info_alloeye.csv")
pid_info['pid'] = pid_info.pid.apply(lambda s: 'alloeye_' + str(s))
info_cols = pid_info.columns
df = add_col_by_lookup(df, 'group', 'ppt_id', pid_info, 'pid', 'group')


# replace ppt_id with random integers to avoid any effect of id number
pid_rand = np.random.default_rng().choice(len(df), size=len(df), replace=False)
# df.ppt_id = pid_rand
# wrangle some columns
def drop_col_with(df, string):
    for col in df.columns:
        if string in col:
            df = df.drop([col], axis=1)
    return df 

df = drop_col_with(df, 'Unnamed')
drop_cols = ['n_correct', 'n_trials', 'pupil_diam_centre_enc', 'pupil_diam_centre_ret',
            'pupil_diam_spread_ret', 'pupil_diam_spread_enc',
            'dwell_total_ret', 'dwell_total_enc']
df = df.drop(drop_cols, axis=1)

# drop entropy 
e_drop = 'hd' if HnT_dF else 'hn'
df = drop_col_with(df, e_drop)

# remove sacc for now
df = drop_col_with(df, 'sacc')

# remove velocity for now
df = drop_col_with(df, 'velocity')

# remove old blinks
df = drop_col_with(df, 'dur_blinks')

# dwell comparison
# df['dwell_MOvsSOs_enc'] = df['dwell_obj1_enc'] - df['dwell_obj2_enc'] - df['dwell_obj3_enc'] - df['dwell_obj4_enc']
df['dwell_MO-SOs_ret'] = df['dwell_obj1_ret'] - df['dwell_obj2_ret'] - df['dwell_obj3_ret'] - df['dwell_obj4_ret']
# df['dwell_MOvsSOs_diff'] = df['dwell_MOvsSOs_ret'] - df['dwell_MOvsSOs_enc']
# df = df.drop(['dwell_MOvsSOs_enc'], axis=1)
# df['dwell_MOret_v_Os_er'] = df['dwell_MO-SOs_ret'] - df['dwell_obj1_enc'] - df['dwell_obj2_enc'] - df['dwell_obj3_enc'] - df['dwell_obj4_enc']
# df['dwell_MO-meanSOs_enc'] = df['dwell_obj1_enc'] - np.nanmean(df.loc[:, ['dwell_obj2_enc', 'dwell_obj3_enc', 'dwell_obj4_enc']], axis=1)
# df['dwell_MO-meanSOs_ret'] = df['dwell_obj1_ret'] - np.nanmean(df.loc[:, ['dwell_obj2_ret', 'dwell_obj3_ret', 'dwell_obj4_ret']], axis=1)
# df['dwell_MO-meanSOs_diff'] = df['dwell_MO-meanSOs_ret'] - df['dwell_MO-meanSOs_enc']
# df['dwell_MO-table_ret'] = df['dwell_obj1_ret'] - df['dwell_table_ret']
# df['dwell_Os_v_table_ret'] = df['dwell_obj1_ret'] + df['dwell_obj2_ret'] + df['dwell_obj3_ret'] + df['dwell_obj4_ret'] - df['dwell_table_ret']
# df['dwell_Os_v_table_diff'] = df['dwell_Os_v_table_ret'] - (df['dwell_obj1_enc'] + df['dwell_obj2_enc'] + df['dwell_obj3_enc'] + df['dwell_obj4_enc'] - df['dwell_table_enc'])

# drop obj2-4 cols
for col in df.columns:
    if'obj' in col:
        if '2' in col or '3' in col or '4' in col:
            df = df.drop([col], axis=1)

# p fixations - NEED TO DEAL WITH INFINITIES CAUSED BY DIFF - DIVIDING BY 0
for col in df.columns:
    if 'n_fix' in col and 'total' not in col and 'diff' not in col:
        rem = col.split('n_fix')[-1]
        view = rem.split('_')[-1]
        new_col = 'p_fix'+rem
        total = 'n_fix_total_'+view
        df[new_col] = df[col] / df[total]
        df = df.drop([col], axis=1)
for col in df.columns:
    if 'n_fix' in col and 'total' in col:
        df = df.drop([col], axis=1)


# dwell time as proportion
for col in df.columns:
    if 'dwell' in col:
        df[col] /= 7000
        
# sort 4MT
first_col = 0
col_saved = False
for col in df.columns:
    if '4MT' in col:
        nas = pd.isna(df[col])
        # print(sum(nas))
        if 'RT' not in col:
            df[col] = np.where(df[col] == 'CORRECT', 1, 0)
            if col_saved is False:
                first_col = df.columns.get_loc(col)
                col_saved = True
        df[col][nas] = np.nan
df['4MT'] = df.iloc[:, first_col:first_col+15].sum(axis=1)
df = drop_col_with(df, '4MT_T')

# neuropsych wrangling
# df['FCSRT_FR_I'] = df['FCSRT T1 FR'] + df['FCSRT T2 FR'] + df['FCSRT T3 FR']
# df['FCSRT_TR_I'] = df['FCSRT T1 TR'] + df['FCSRT T2 TR'] + df['FCSRT T3 TR']
df = drop_col_with(df, 'FCSRT T')

# choose either dwell or p fix
if dwellT_nfixF is True:
    df = drop_col_with(df, 'fix')
else:
    df = drop_col_with(df, 'dwell')
    
df = df.drop(['condition_id', 'study_id'], axis=1)

display(df.head(20).style)

  df['4MT'] = df.iloc[:, first_col:first_col+15].sum(axis=1)


Unnamed: 0,ppt_id,condition,p_correct,hn_enc,dwell_obj1_enc,dwell_table_enc,dwell_other_enc,t_first_array_enc,t_first_obj1_enc,hn_ret,dwell_obj1_ret,dwell_table_ret,dwell_other_ret,t_first_array_ret,t_first_obj1_ret,hn_diff,dwell_total_diff,dwell_obj1_diff,dwell_table_diff,dwell_other_diff,t_first_array_diff,t_first_obj1_diff,pupil_diam_centre_diff,pupil_diam_spread_diff,dwell_pp_enc,dispersion_mean_enc,drop_out_total_enc,dwell_pp_ret,dispersion_mean_ret,drop_out_total_ret,dwell_pp_diff,dispersion_mean_diff,drop_out_total_diff,d_kl,ea_td,n_blinks_enc,n_blinks_ret,n_blinks_diff,mean_confidence,p_trackloss_enc,p_trackloss_ret,p_trackloss_diff,group,dwell_MO-SOs_ret,4MT
0,alloeye_56,StayStay,0.666667,0.302456,0.211429,0.150548,0.028595,435.833333,4214.833333,0.636404,0.257786,0.137071,0.043667,562.333333,1869.166667,0.333948,-0.032476,0.046357,-0.013476,0.015071,126.5,-2345.666667,0.125001,-0.038701,0.0,0.121661,0.0,0.006333,0.083045,0.0,0.006333,-0.038616,0.0,12.959233,1.456952,4.666667,4.5,-0.166667,8.666667,0.605384,0.316445,-0.288939,O,-0.099643,4653.101288
1,alloeye_58,WalkStay,0.111111,0.408367,0.131587,0.031905,0.050111,733.888889,2135.555556,0.592748,0.163857,0.081238,0.019825,416.666667,1717.333333,0.184382,0.010635,0.03227,0.049333,-0.030286,-317.222222,-418.222222,0.120577,-0.034907,0.0,0.080694,0.0,0.0,0.055348,0.0,0.0,-0.025346,0.0,10.846138,1.717599,1.888889,1.777778,-0.111111,7.222222,0.09911,0.066164,-0.032946,P,-0.331794,2871.035194
2,alloeye_58,TeleportStay,0.777778,0.476228,0.17219,0.056746,0.035603,492.555556,2936.777778,0.669506,0.257016,0.12119,0.021952,275.777778,1045.111111,0.193278,-0.021698,0.084825,0.064444,-0.013651,-216.777778,-1891.666667,0.100712,0.01262,0.0,0.056232,0.0,0.003032,0.069811,0.0,0.003032,0.013579,0.0,9.479562,1.566315,2.0,1.777778,-0.222222,7.777778,0.102272,0.148204,0.045932,P,-0.128619,3431.921544
3,alloeye_58,StayRotate,0.666667,0.55813,0.155365,0.093651,0.030667,495.333333,1971.0,0.603706,0.318683,0.118413,0.008873,67.555556,1995.888889,0.045576,0.01273,0.163317,0.024762,-0.021794,-427.777778,24.888889,0.185666,-0.065863,0.0,0.049327,0.0,0.0,0.032854,0.0,0.0,-0.016473,0.0,11.317323,1.777096,2.333333,3.0,0.666667,7.888889,0.178211,0.15143,-0.026781,P,-0.067683,2468.887486
4,alloeye_58,WalkRotate,0.888889,0.612061,0.166095,0.071016,0.031841,453.555556,1321.111111,0.609721,0.283317,0.187794,0.0,0.0,2563.333333,-0.00234,0.058063,0.117222,0.116778,-0.031841,-453.555556,1242.222222,0.078867,-0.041476,0.0,0.07403,0.0,0.005571,0.040202,0.0,0.005571,-0.033828,0.0,10.124299,1.841908,1.555556,3.222222,1.666667,9.333333,0.080636,0.209182,0.128546,P,-0.069905,1777.517401
5,alloeye_57,StayStay,0.888889,0.362713,0.099167,0.217071,0.042643,516.5,2033.0,0.339793,0.359071,0.219476,0.021619,555.333333,3685.666667,-0.02292,0.083833,0.259905,0.002405,-0.021024,38.833333,1652.666667,0.327494,0.130598,0.0,0.154949,0.0,0.004167,0.062691,0.0,0.004167,-0.092258,0.0,11.049324,1.832773,4.333333,4.0,-0.333333,9.555556,0.248541,0.248679,0.000139,P,0.184524,2552.050442
6,alloeye_57,WalkStay,0.333333,0.447872,0.127167,0.170595,0.074976,620.666667,3742.0,0.49117,0.079738,0.304905,0.019214,472.0,1334.333333,0.043298,-0.033714,-0.047429,0.13431,-0.055762,-148.666667,-2407.666667,0.168091,0.076298,0.0,0.119567,0.0,0.002976,0.058489,0.0,0.002976,-0.061078,0.0,10.966029,1.673403,4.333333,3.833333,-0.5,5.222222,0.200882,0.318796,0.117914,P,-0.223429,4364.715637
7,alloeye_57,TeleportStay,0.666667,0.36246,0.078357,0.206976,0.053762,600.833333,1874.333333,0.344573,0.291214,0.1805,0.031333,476.333333,1611.833333,-0.017887,-0.130048,0.212857,-0.026476,-0.022429,-124.5,-262.5,0.109551,0.00372,0.0,0.165858,0.0,0.01269,0.113543,0.0,0.01269,-0.052315,0.0,12.701777,1.568864,4.333333,2.833333,-1.5,7.333333,0.281812,0.156328,-0.125484,P,0.065048,2477.38251
8,alloeye_57,StayRotate,0.666667,0.191886,0.150024,0.289857,0.066643,616.333333,2391.166667,0.499997,0.291881,0.305929,0.023786,302.666667,1706.333333,0.308111,0.167333,0.141857,0.016071,-0.042857,-313.666667,-684.833333,0.213548,0.157879,0.0,0.1633,0.0,0.0,0.132651,0.0,0.0,-0.030649,0.0,11.791759,1.228212,3.833333,3.666667,-0.166667,7.222222,0.462502,0.215343,-0.247158,P,0.090024,3009.986669
9,alloeye_57,WalkRotate,0.666667,0.487809,0.098786,0.198357,0.054548,547.333333,2616.5,0.435975,0.252762,0.240595,0.069857,802.5,1762.0,-0.051834,0.008286,0.153976,0.042238,0.01531,255.166667,-854.5,0.08591,0.157251,0.0,0.094144,0.0,0.02381,0.093232,0.0,0.02381,-0.000912,0.0,11.182743,1.50683,2.666667,3.833333,1.166667,8.333333,0.199554,0.348142,0.148589,P,0.07819,3166.338688


Unnamed: 0,condition,p_correct,hn_enc,dwell_obj1_enc,dwell_table_enc,dwell_other_enc,t_first_array_enc,t_first_obj1_enc,hn_ret,dwell_obj1_ret,...,dwell_obj1_diff,dwell_table_diff,dwell_other_diff,t_first_array_diff,t_first_obj1_diff,pupil_diam_centre_diff,pupil_diam_spread_diff,group,dwell_MO-SOs_ret,4MT
0,StayStay,,,,,,,,,,...,,,,,,,,O,,0.000000
1,WalkStay,,,,,,,,,,...,,,,,,,,O,,0.000000
2,TeleportStay,,,,,,,,,,...,,,,,,,,O,,0.000000
3,StayRotate,,,,,,,,,,...,,,,,,,,O,,0.000000
4,WalkRotate,,,,,,,,,,...,,,,,,,,O,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,WalkStay,0.111111,0.575240,0.154746,0.176444,0.053000,479.333333,2291.875000,0.637692,0.155984,...,0.001238,0.077651,-0.012921,-169.222222,-1147.375000,0.229216,0.010282,P,-0.386635,2773.366726
302,TeleportStay,0.222222,0.560787,0.177968,0.198587,0.046000,484.222222,2133.777778,0.639410,0.194381,...,0.016413,0.043841,-0.005397,-151.888889,-811.888889,0.179642,-0.009102,P,-0.308540,2620.322388
303,StayRotate,0.555556,0.529937,0.209206,0.250460,0.053175,539.666667,1639.333333,0.647151,0.204206,...,-0.005000,0.057952,-0.016302,-303.777778,1149.000000,0.291016,0.008454,P,-0.268937,2181.794977
304,WalkRotate,0.333333,0.611144,0.170714,0.158667,0.053159,497.888889,1608.666667,0.744455,0.147587,...,-0.023127,0.046905,-0.024698,-287.555556,-97.777778,0.254588,0.020176,P,-0.471270,2109.008647


In [5]:
# more wrangling

from sklearn import preprocessing

X = df.drop(['ppt_id'], axis=1).dropna().reset_index(drop=True)
c = X['condition']
X = X.drop(['condition'], axis=1)

replace_map = {'P': 0, 'O': 1, 'Y': 2}
X['group'] = X['group'].replace(replace_map)

y = X['group']
X = X.drop(['group'], axis=1)
X_cols = X.columns
# X_cols = X.columns

# replace with multiple imputation at some point
for col in X.columns:
    # print(col)
    X[col] = X[col].fillna(np.nanmean(X[col]))
    
Xs = preprocessing.StandardScaler().fit(X).transform(X)
Xs_df = pd.DataFrame(Xs, columns=X_cols)
Xs_df['group'] = y
Xs_df['condition'] = c
Xsg = Xs_df.groupby(by='group').mean()
Xs_df = Xs_df.drop(['group'], axis=1)

display(Xs_df.head(20).style)

Unnamed: 0,p_correct,hn_enc,dwell_obj1_enc,dwell_table_enc,dwell_other_enc,t_first_array_enc,t_first_obj1_enc,hn_ret,dwell_obj1_ret,dwell_table_ret,dwell_other_ret,t_first_array_ret,t_first_obj1_ret,hn_diff,dwell_total_diff,dwell_obj1_diff,dwell_table_diff,dwell_other_diff,t_first_array_diff,t_first_obj1_diff,pupil_diam_centre_diff,pupil_diam_spread_diff,dwell_pp_enc,dispersion_mean_enc,drop_out_total_enc,dwell_pp_ret,dispersion_mean_ret,drop_out_total_ret,dwell_pp_diff,dispersion_mean_diff,drop_out_total_diff,d_kl,ea_td,n_blinks_enc,n_blinks_ret,n_blinks_diff,mean_confidence,p_trackloss_enc,p_trackloss_ret,p_trackloss_diff,dwell_MO-SOs_ret,4MT,condition
0,0.105834,-1.133201,1.169993,-0.039175,-0.405228,-0.338521,0.78197,0.88901,0.335236,-0.572509,0.63855,1.842678,-0.519805,1.825893,-0.548375,-0.164926,-0.632747,1.033885,1.931489,-1.31687,-0.306191,-0.310872,0.0,-0.036743,0.0,0.1162,0.373214,0.0,0.1162,0.253596,0.0,0.812507,-1.04922,0.795194,0.910147,0.138924,1.105247,2.053038,0.417862,-1.613116,0.245716,0.731809,StayStay
1,-2.272625,-0.268771,-0.552464,-1.089452,0.315921,1.648966,-0.550995,0.575955,-0.540113,-1.056298,-0.207975,0.974122,-0.597987,0.789252,0.038687,-0.292757,0.012167,-0.52539,-0.45156,-0.179727,-0.348543,-0.271313,0.0,-0.76214,0.0,-0.56556,-0.441921,0.0,-0.56556,0.494554,0.0,-0.275378,-0.028216,-0.750957,-0.660604,0.1903,0.333033,-0.786375,-0.784592,-0.140621,-1.052183,-0.383734,WalkStay
2,0.581526,0.285108,0.323489,-0.869546,-0.170342,0.039713,-0.037355,1.126392,0.328061,-0.710115,-0.132453,0.134054,-0.944124,0.850913,-0.40161,0.184143,0.167324,0.04648,0.087885,-1.049022,-0.538688,0.224191,0.0,-1.195284,0.0,-0.239204,-0.01628,0.0,-0.239204,1.201366,0.0,-0.978933,-0.620824,-0.689111,-0.660604,0.087548,0.630039,-0.768641,-0.390436,0.313093,0.083717,-0.032629,TeleportStay
3,0.105834,0.953579,-0.039494,-0.54285,-0.3358,0.058236,-0.656487,0.654531,0.902751,-0.734184,-0.596858,-1.107498,-0.454554,-0.17281,0.067219,0.896397,-0.240127,-0.233453,-1.045309,0.081697,0.274488,-0.594058,0.0,-1.317555,0.0,-0.56556,-1.103941,0.0,-0.56556,0.655667,0.0,-0.032798,0.204842,-0.503573,0.044631,0.909564,0.68944,-0.342741,-0.374937,-0.105159,0.424399,-0.635472,StayRotate
4,1.057218,1.393759,0.191994,-0.743224,-0.29643,-0.220346,-1.073111,0.697668,0.573174,-0.133007,-0.911909,-1.510307,-0.162369,-0.504914,0.684543,0.478119,0.704671,-0.578867,-1.18375,0.799892,-0.747785,-0.339801,0.0,-0.880135,0.0,0.034183,-0.887685,0.0,0.034183,0.340521,0.0,-0.647003,0.458727,-0.936495,0.172855,1.834332,1.461653,-0.88999,-0.097474,0.788298,0.411975,-1.068258,WalkRotate
5,1.057218,-0.641393,-1.251891,0.549721,0.065607,0.199379,-0.61674,-1.237999,1.279148,0.141518,-0.144288,1.800939,0.415537,-0.647549,1.035462,1.77285,-0.469685,-0.206987,1.460667,1.042044,1.632058,1.454207,0.0,0.552683,0.0,-0.117034,-0.225831,0.0,-0.117034,-0.720474,0.0,-0.170772,0.422941,0.609656,0.621642,-0.015204,1.580455,0.051699,0.092289,0.049686,1.834424,-0.583414,StayStay
6,-1.321241,0.05367,-0.647833,0.138294,1.149325,0.89398,0.47885,-0.152469,-1.324043,0.881747,-0.229674,1.304054,-0.795199,-0.1886,-0.565235,-1.015957,0.884683,-1.401204,0.453682,-1.353448,0.106259,0.888084,0.0,-0.073822,0.0,-0.245184,-0.349477,0.0,-0.245184,-0.15428,0.0,-0.213655,-0.201342,0.609656,0.525473,-0.169332,-0.736185,-0.215592,0.42916,0.727141,-0.44634,0.551285,WalkStay
7,0.105834,-0.643453,-1.700826,0.460354,0.438285,0.761728,-0.718457,-1.20372,0.646767,-0.196205,0.200634,1.329892,-0.65231,-0.612668,-1.877047,1.34593,-0.766228,-0.25528,0.583471,-0.087855,-0.45408,0.131406,0.0,0.745853,0.0,0.800522,1.270758,0.0,0.800522,0.004827,0.0,0.67996,-0.61084,0.609656,-0.051538,-1.0941,0.392434,0.238299,-0.351404,-0.672904,1.166461,-0.630154,TeleportStay
8,0.105834,-2.035658,-0.154724,1.19405,0.870016,0.865085,-0.38713,-0.089169,0.652979,0.890618,-0.067357,0.294382,-0.603651,1.646818,2.172517,0.701661,-0.329359,-0.957567,-0.432465,-0.337021,0.541372,1.738638,0.0,0.70056,0.0,-0.56556,1.83312,0.0,-0.56556,0.398253,0.0,0.211456,-1.945235,0.331349,0.429305,0.138924,0.333033,1.251689,-0.067872,-1.372788,1.306097,-0.296753,StayRotate
9,0.105834,0.379625,-1.26011,0.384054,0.464619,0.404981,-0.242675,-0.548274,0.288417,0.324512,1.568487,3.274702,-0.574987,-0.847952,0.006697,0.811632,-0.060685,1.04207,2.622504,-0.43712,-0.680369,1.732093,0.0,-0.52399,0.0,1.997445,0.672995,0.0,1.997445,0.938235,0.0,-0.102084,-0.85384,-0.318035,0.525473,1.371948,0.927044,-0.223043,0.57015,0.903584,1.23994,-0.198879,WalkRotate


In [5]:
# save to csv
dir = "C:\\Users\\Luke\\Documents\\AlloEye\\data\\feature_saves\\"
file_name = "conds_cleaned"
file_path = dir + f"{file_name}.csv"
df.to_csv(file_path, index=False)


In [8]:
# within groups, difference between conditions

p_thresh = 0.05
import scipy.stats as stats
# stats f_oneway functions takes the groups as input and returns ANOVA F and p value
anova_filt_cols = [[], [], []]
groups=['P', 'O', 'Y'] 

for col in Xs_df.columns:
    if col != 'condition' and col != 'group':
        P = Xs_df[y == 0]
        O = Xs_df[y == 1]
        Y = Xs_df[y == 2]
        dfs = [P, O, Y]
        for i in range(3):
            df = dfs[i]
            SS = df[col][df.condition == 'StayStay']
            WS = df[col][df.condition == 'WalkStay']
            TS = df[col][df.condition == 'TeleportStay']
            SR = df[col][df.condition == 'StayRotate']
            WR = df[col][df.condition == 'WalkRotate']
            TR = df[col][df.condition == 'TeleportRotate']

            fvalue, pvalue = stats.f_oneway(SS, WS, TS, SR, WR, TR)
            if pvalue < p_thresh:
                print(f"group {groups[i]}, col: {col}, f: {fvalue:.3f}, p: {pvalue:.3f})")
                anova_filt_cols[i].append(col)

group O, col: p_correct, f: 5.993, p: 0.000)
group Y, col: p_correct, f: 7.276, p: 0.000)
group Y, col: hn_ret, f: 4.718, p: 0.000)
group O, col: dwell_obj1_ret, f: 2.978, p: 0.015)
group Y, col: dwell_obj1_ret, f: 8.215, p: 0.000)
group O, col: dwell_other_ret, f: 3.330, p: 0.008)
group P, col: t_first_array_ret, f: 6.203, p: 0.000)
group O, col: t_first_array_ret, f: 9.735, p: 0.000)
group Y, col: t_first_array_ret, f: 5.491, p: 0.000)
group Y, col: hn_diff, f: 2.985, p: 0.013)
group O, col: dwell_total_diff, f: 3.386, p: 0.007)
group O, col: dwell_obj1_diff, f: 4.444, p: 0.001)
group Y, col: dwell_obj1_diff, f: 7.942, p: 0.000)
group Y, col: dwell_other_diff, f: 2.983, p: 0.013)
group P, col: t_first_array_diff, f: 4.687, p: 0.002)
group O, col: t_first_array_diff, f: 3.295, p: 0.008)
group Y, col: t_first_array_diff, f: 6.985, p: 0.000)
group Y, col: pupil_diam_centre_diff, f: 6.014, p: 0.000)
group Y, col: pupil_diam_spread_diff, f: 3.086, p: 0.011)
group Y, col: dispersion_mean_r

In [14]:
# within conditions, difference between group pairs

p_thresh = 0.05
import scipy.stats as stats
# stats f_oneway functions takes the groups as input and returns ANOVA F and p value
anova_filt_cols_conditions = []
conditions=['StayStay', 'WalkStay', 'TeleportStay', 'StayRotate', 'WalkRotate', 'TeleportRotate'] 
import itertools
group_codes = [0, 1, 2]
pairs = list(itertools.combinations(group_codes, 2))
print(pairs)
for pair in pairs:
    g1, g2 = pair
    g_df = Xs_df[(y == g1) | (y == g2)]
    for col in Xs_df.columns:
        if col != 'condition' and col != 'group':
            dfs = []
            for i in range(len(conditions)):
                anova_filt_cols_conditions.append([])
                df = g_df[col][g_df.condition == conditions[i]]
                G1 = df[y == g1]
                G2 = df[y == g2]
                tvalue, pvalue = stats.ttest_ind(G1, G2)
                if pvalue < p_thresh:
                    print(f"{groups[g1]} vs {groups[g2]} {conditions[i]}, col: {col}, t: {tvalue:.3f}, p: {pvalue:.3f})")
                    anova_filt_cols_conditions[i].append(col)

[(0, 1), (0, 2), (1, 2)]
P vs O StayStay, col: hn_enc, t: 2.226, p: 0.035)
P vs O TeleportStay, col: dwell_obj1_enc, t: -2.493, p: 0.020)
P vs O StayStay, col: hn_diff, t: -2.113, p: 0.045)
P vs O TeleportStay, col: dwell_obj1_diff, t: 2.385, p: 0.025)
P vs O StayRotate, col: t_first_array_diff, t: -2.476, p: 0.020)
P vs O WalkRotate, col: p_trackloss_enc, t: -2.170, p: 0.040)
P vs O WalkRotate, col: p_trackloss_diff, t: 2.748, p: 0.011)
P vs Y StayStay, col: p_correct, t: -3.697, p: 0.001)
P vs Y TeleportStay, col: p_correct, t: -2.384, p: 0.023)
P vs Y StayRotate, col: p_correct, t: -2.962, p: 0.006)
P vs Y TeleportRotate, col: p_correct, t: -2.657, p: 0.012)
P vs Y StayStay, col: hn_enc, t: 2.106, p: 0.043)
P vs Y WalkRotate, col: hn_enc, t: 2.817, p: 0.008)
P vs Y WalkStay, col: t_first_array_enc, t: 3.245, p: 0.003)
P vs Y TeleportStay, col: t_first_array_enc, t: 3.352, p: 0.002)
P vs Y StayRotate, col: t_first_array_enc, t: 2.571, p: 0.015)
P vs Y WalkRotate, col: t_first_array_e

In [None]:

def subboxplots(df, cols, xlabs=['SS', 'WS', 'TS', 'SR', 'WR', 'TR']):
    n_cols = 4
    n_rows = int(np.ceil(len(cols)/n_cols))

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 10), dpi=100)
    plt.subplots_adjust(left=0.1,
                        bottom=0.1,
                        right=0.6,
                        top=0.9,
                        wspace=0.6,
                        hspace=0.6)
    ticks = list(replace_map.values())
    labels = list(replace_map.keys())
    for i in range(len(cols)):
        row = int(np.floor(i/n_cols))
        col = i % n_cols
        feat = cols[i]
        plt.sca(axes[row, col])
        df.groupby(by='group').boxplot(column=feat, subplots=False)
        # axes[row, col].set_xticks(ticks)
        axes[row, col].set_xticklabels(xlabs)
        axes[row, col].set_title(feat)