In [5]:
import matplotlib.pyplot as plt, numpy as np, seaborn as sns, scipy.stats as stats, pandas as pd, os, glob
import ast
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from scipy.stats import rankdata
from scipy.stats import ttest_ind

simplify patient IDs

In [6]:
raw_pt_IDs, pt_IDs = [], []

for pt_file in glob.glob('../../results/psychopy/asymmetry*.csv'):
    
    file_basename = os.path.basename(pt_file)
    raw_pt_IDs.append(int(file_basename.split('subj-')[1].split('_')[0]))
    pt_IDs.append(int(file_basename.split('subj-')[1].split('_')[0][-2:]))

pt_ID_map = {raw_pt_ID:pt_ID for raw_pt_ID, pt_ID in zip(raw_pt_IDs, pt_IDs)}
print(pt_ID_map)

{202509: 9, 202511: 11, 202512: 12, 202521: 21, 202518: 18}


load and format df

In [7]:
df_subjs = pd.DataFrame()

# init new cols
for col in ['dir_flip', 'sess_flip',
            'chosen_pos_aligned', 'div_pos_aligned', 'stim_pos_aligned',
            'true_class', 'pred_class', 'err_type',
            'invalid', 'missed', 'purely_incorrect',
            'signed_err', 'unsigned_err',
            'dist_moved', 'normed_RT',
            'uncertainty', 'stim_aligned_to_div',
            'resp_aligned_to_div', 'stim_aligned_to_cntxt', 'resp_aligned_to_cntxt']:
    df_subjs[col] = np.nan

for raw_pt_ID in raw_pt_IDs:

    df_subj = pd.read_csv(glob.glob(f'../../results/psychopy/*{raw_pt_ID}*')[0])[:240]
    df_subj['subj'] = df_subj['subj'].replace(pt_ID_map)
    # ensure integer dtype (nullable int supports NaNs)
    df_subj['subj'] = pd.to_numeric(df_subj['subj'], errors='coerce').astype('Int64')

    # # trial sort
    # df_subj = df_subj.sort_values(by='trial_key').reset_index(drop=True)
    # assert df_subj['trial_key'].is_monotonic_increasing, 'trial_key is not sorted correctly'

    # convert str to list, and store chosen position
    for col in ['positions']:
        if type(df_subj[col][0]) == str:
            df_subj.loc[:, col] = df_subj[col].apply(ast.literal_eval)
            
    df_subj['chosen_pos'] = df_subj['positions'].apply(lambda x: x[-1])
    
## must sequentially align for flips & sessions because of the double flip situations

# 1. aligning for direction flips
    df_subj['dir_flip'] = (df_subj['shape_order'] == 'flat_curv').astype(int)
    assert df_subj['dir_flip'].sum() == len(df_subj)/2
   
   # align positions to curv=left, flat=right
    for col_prefix in ['chosen_pos', 'div_pos']: # dont need to unflip stim_pos because it never gets flipped; only target_pos does
        df_subj[col_prefix + '_aligned'] = np.where( df_subj['dir_flip'],
                                                     -df_subj[col_prefix], df_subj[col_prefix] )

# 2. aligning for session type (stim-val association)
    if df_subj['sess_type'][0] in ['A', 'C']: # adam, tori       
        df_subj['condition'] = df_subj['condition'].map({'baseline':'baseline', 'curv_comp':'pen_comp', 'flat_comp':'rew_comp'})
        df_subj['sess_flip'] = 0
        df_subj['stim_pos_aligned'] = df_subj['stim_pos']

    elif df_subj['sess_type'][0] in ['B', 'D']: # amisha, erfan     
        df_subj['condition'] = df_subj['condition'].map({'baseline':'baseline', 'curv_comp':'rew_comp', 'flat_comp':'pen_comp'})
        df_subj['sess_flip'] = 1
        df_subj['stim_pos_aligned'] = -df_subj['stim_pos']

        # align positions to penalty=left, reward=right
        for col in ['chosen_pos_aligned', 'div_pos_aligned']:
            df_subj[col] = -df_subj[col]

    df_subjs = pd.concat([df_subjs, df_subj], ignore_index=True)

# category stuff
df_subjs['true_class'] = np.where( df_subjs['valence'] == 'rew', 1, 0 ) 
df_subjs['pred_class'] = (df_subjs['div_pos_aligned'] < df_subjs['chosen_pos_aligned']).astype(int)
df_subjs['err_type'] = df_subjs['pred_class'] - df_subjs['true_class']

# outcome stuff
df_subjs['outcome'] = df_subjs['outcome'].replace({2: 3, -2: -3}) # older versions
# for PILOTS, convert 0 to -1 or -3 depending on whether true_class is 1 or 0
df_subjs['outcome'] = np.where( (df_subjs['true_class'] == 1) & (df_subjs['outcome'] == 0), -1,
                             np.where( (df_subjs['true_class'] == 0) & (df_subjs['outcome'] == 0), -3,
                                      df_subjs['outcome'] ) )

# invalid and missed trials
df_subjs['invalid'] = df_subjs['trials.slider_resp.rt'].isna()
df_subjs['missed'] = df_subjs['trials.submit_resp.keys'].isna()
df_subjs['purely_incorrect'] = (df_subjs['correct']==0) & ~df_subjs['trials.slider_resp.rt'].isna() & ~df_subjs['trials.submit_resp.keys'].isna()

# continuous resp
df_subjs['signed_err'] = df_subjs['chosen_pos_aligned'] - df_subjs['stim_pos_aligned']
df_subjs['unsigned_err'] = (df_subjs['chosen_pos_aligned'] - df_subjs['stim_pos_aligned']).abs()

# RT
df_subjs['dist_moved'] = (df_subjs['chosen_pos'] - df_subjs['marker_init']).abs()
df_subjs['normed_RT'] = df_subjs['trials.submit_resp.rt']/ (10 * (df_subjs['dist_moved'] + .01) )

# boundary stuff
df_subjs['uncertainty'] = (df_subjs['stim_pos_aligned'] - df_subjs['div_pos_aligned']).abs() < 0.2
df_subjs['stim_aligned_to_div'] = (df_subjs['stim_pos_aligned'] - df_subjs['div_pos_aligned']).round(3)
df_subjs['resp_aligned_to_div'] = (df_subjs['chosen_pos_aligned'] - df_subjs['div_pos_aligned']).round(3)
# if loss,  if stim is on penalty side of div, pos if on reward side
df_subjs['stim_aligned_to_cntxt'] = np.where( df_subjs['true_class'] == 1, df_subjs['stim_aligned_to_div'], - df_subjs['stim_aligned_to_div'] )
df_subjs['resp_aligned_to_cntxt'] = np.where( df_subjs['true_class'] == 1, df_subjs['resp_aligned_to_div'], - df_subjs['resp_aligned_to_div'] )

# rank within a block for each subj/cond
grp = ['subj', 'condition', 'blockN']
# build both ranks, assign once, then copy to defragment
df_subjs = df_subjs.assign(
    stim_ranks = df_subjs.groupby(grp)['stim_pos_aligned'].transform('rank'),
    resp_ranks = df_subjs.groupby(grp)['chosen_pos_aligned'].transform('rank'),
).copy()
max_rank = df_subjs['stim_ranks'].max()

df_subjs = df_subjs.copy()

# sort by subj
df_subjs = df_subjs.sort_values(by=['subj']).reset_index(drop=True)
df_subjs.to_csv('../../results/psychopy/all_subjs_clean.csv', index=False)

# asserts
print(df_subjs['outcome'].value_counts(), '\n')
print(df_subjs['correct'].value_counts(), '\n')
print(df_subjs['uncertainty'].value_counts(), '\n')
print(df_subjs.shape, '\n')
print(len(df_subjs))
assert len(df_subjs) == 240 * len(raw_pt_IDs), "Total trials do not match expected number"

disp_cols = ['subj', 'sess_type', 'sess_flip', 'shape_order', 'dir_flip', 'target_pos', 'shape', 'valence', 'true_class',
             'div_pos', 'div_pos_aligned', 'stim_pos', 'stim_pos_aligned', 'chosen_pos', 'chosen_pos_aligned', 'pred_class', 'err_type',
             'signed_err', 'unsigned_err']

df_subjs[(df_subjs['sess_type'] == 'B') &
         (df_subjs['condition'] == 'baseline') &
         (df_subjs['err_type'] != 0) &
         (df_subjs['sess_flip'] != df_subjs['dir_flip'])
        ][disp_cols][:20]


outcome
 3.0    476
 1.0    473
-3.0    127
-1.0    124
Name: count, dtype: int64 

correct
1.0    949
0.0    251
Name: count, dtype: int64 

uncertainty
True     800
False    400
Name: count, dtype: int64 

(1200, 144) 

1200


Unnamed: 0,subj,sess_type,sess_flip,shape_order,dir_flip,target_pos,shape,valence,true_class,div_pos,div_pos_aligned,stim_pos,stim_pos_aligned,chosen_pos,chosen_pos_aligned,pred_class,err_type,signed_err,unsigned_err
343,11,B,1.0,curv_flat,0.0,0.1,flat,pen,0,0.0,-0.0,0.1,-0.1,-0.076,0.076,1,1,0.176,0.176
351,11,B,1.0,curv_flat,0.0,-0.26,curv,rew,1,0.0,-0.0,-0.26,0.26,0.04,-0.04,0,-1,-0.3,0.3
352,11,B,1.0,curv_flat,0.0,-0.02,curv,rew,1,0.0,-0.0,-0.02,0.02,0.048,-0.048,0,-1,-0.068,0.068
355,11,B,1.0,curv_flat,0.0,0.3,flat,pen,0,0.0,-0.0,0.3,-0.3,-0.036,0.036,1,1,0.336,0.336
359,11,B,1.0,curv_flat,0.0,-0.14,curv,rew,1,0.0,-0.0,-0.14,0.14,0.012,-0.012,0,-1,-0.152,0.152
360,11,B,1.0,curv_flat,0.0,-0.3,curv,rew,1,0.0,-0.0,-0.3,0.3,0.016,-0.016,0,-1,-0.316,0.316
394,11,B,1.0,curv_flat,0.0,0.02,flat,pen,0,0.0,-0.0,0.02,-0.02,-0.052,0.052,1,1,0.072,0.072
395,11,B,1.0,curv_flat,0.0,-0.1,curv,rew,1,0.0,-0.0,-0.1,0.1,0.012,-0.012,0,-1,-0.112,0.112
443,11,B,1.0,curv_flat,0.0,0.02,flat,pen,0,0.0,-0.0,0.02,-0.02,-0.22,0.22,1,1,0.24,0.24
448,11,B,1.0,curv_flat,0.0,-0.1,curv,rew,1,0.0,-0.0,-0.1,0.1,0.024,-0.024,0,-1,-0.124,0.124


In [9]:
# use groupby.head(1) to keep the first trial for each subject
df_first_per_subj = df_subjs.groupby('subj').head(1)[['subj', 'sess_type']].reset_index(drop=True)
df_first_per_subj

Unnamed: 0,subj,sess_type
0,9,A
1,11,B
2,12,C
3,18,D
4,21,A
