In [None]:
import matplotlib.pyplot as plt, numpy as np, seaborn as sns, scipy.stats as stats, pandas as pd, os, glob
import ast
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from scipy.stats import rankdata
from scipy.stats import ttest_ind

In [None]:
# subj_info
raw_subjs = [202509, 202511, 202512, 202518, 'amisha', 'erfan', 'tori']
patients = ['09', '11', '12', '18']
pilots = ['A', 'E', 'T']
subjs = patients + pilots
subj_map = {202509.0: '09', 202511.0: '11', 202512.0: '12', 202518.0: '18',
            'amisha': 'A', 'erfan': 'E', 'tori': 'T'}

load and format df

In [None]:
df_subjs = pd.DataFrame()

# init new cols
for col in ['dir_flip', 'sess_flip',
            'chosen_pos_aligned', 'div_pos_aligned', 'stim_pos_aligned',
            'true_class', 'pred_class', 'err_type',
            'invalid', 'missed', 'purely_incorrect',
            'signed_err', 'unsigned_err',
            'dist_moved', 'normed_RT',
            'uncertainty', 'stim_aligned_to_div',
            'resp_aligned_to_div', 'stim_aligned_to_cntxt', 'resp_aligned_to_cntxt']:
    df_subjs[col] = np.nan

for raw_subj in raw_subjs:

    df_subj = pd.read_csv(glob.glob(f'../../results/psychopy/*{raw_subj}*')[0])[:240]
    df_subj['subj'] = df_subj['subj'].replace(subj_map)

    # # trial sort
    # df_subj = df_subj.sort_values(by='trial_key').reset_index(drop=True)
    # assert df_subj['trial_key'].is_monotonic_increasing, 'trial_key is not sorted correctly'

    # convert str to list, and store chosen position
    for col in ['positions']:
        if type(df_subj[col][0]) == str:
            df_subj.loc[:, col] = df_subj[col].apply(ast.literal_eval)
            
    df_subj['chosen_pos'] = df_subj['positions'].apply(lambda x: x[-1])
    
## must sequentially align for flips & sessions because of the double flip situations

# 1. aligning for direction flips
    df_subj['dir_flip'] = (df_subj['shape_order'] == 'flat_curv').astype(int)
    assert df_subj['dir_flip'].sum() == len(df_subj)/2
   
   # align positions to curv=left, flat=right
    for col_prefix in ['chosen_pos', 'div_pos']: # dont need to unflip stim_pos because it never gets flipped; only target_pos does
        df_subj[col_prefix + '_aligned'] = np.where( df_subj['dir_flip'],
                                                     -df_subj[col_prefix], df_subj[col_prefix] )

# 2. aligning for session type (stim-val association)
    if df_subj['sess_type'][0] in ['A', 'C']: # adam, tori       
        df_subj['condition'] = df_subj['condition'].map({'baseline':'baseline', 'curv_comp':'pen_comp', 'flat_comp':'rew_comp'})
        df_subj['sess_flip'] = 0
        df_subj['stim_pos_aligned'] = df_subj['stim_pos']

    elif df_subj['sess_type'][0] in ['B', 'D']: # amisha, erfan     
        df_subj['condition'] = df_subj['condition'].map({'baseline':'baseline', 'curv_comp':'rew_comp', 'flat_comp':'pen_comp'})
        df_subj['sess_flip'] = 1
        df_subj['stim_pos_aligned'] = -df_subj['stim_pos']

        # align positions to penalty=left, reward=right
        for col in ['chosen_pos_aligned', 'div_pos_aligned']:
            df_subj[col] = -df_subj[col]

    df_subjs = pd.concat([df_subjs, df_subj], ignore_index=True)

# category stuff
df_subjs['true_class'] = np.where( df_subjs['valence'] == 'rew', 1, 0 ) 
df_subjs['pred_class'] = (df_subjs['div_pos_aligned'] < df_subjs['chosen_pos_aligned']).astype(int)
df_subjs['err_type'] = df_subjs['pred_class'] - df_subjs['true_class']

# outcome stuff
df_subjs['outcome'] = df_subjs['outcome'].replace({2: 3, -2: -3}) # older versions
# for PILOTS, convert 0 to -1 or -3 depending on whether true_class is 1 or 0
df_subjs['outcome'] = np.where( (df_subjs['true_class'] == 1) & (df_subjs['outcome'] == 0), -1,
                             np.where( (df_subjs['true_class'] == 0) & (df_subjs['outcome'] == 0), -3,
                                      df_subjs['outcome'] ) )

# invalid and missed trials
df_subjs['invalid'] = df_subjs['trials.slider_resp.rt'].isna()
df_subjs['missed'] = df_subjs['trials.submit_resp.keys'].isna()
df_subjs['purely_incorrect'] = (df_subjs['correct']==0) & ~df_subjs['trials.slider_resp.rt'].isna() & ~df_subjs['trials.submit_resp.keys'].isna()

# continuous resp
df_subjs['signed_err'] = df_subjs['chosen_pos_aligned'] - df_subjs['stim_pos_aligned']
df_subjs['unsigned_err'] = (df_subjs['chosen_pos_aligned'] - df_subjs['stim_pos_aligned']).abs()

# RT
df_subjs['dist_moved'] = (df_subjs['chosen_pos'] - df_subjs['marker_init']).abs()
df_subjs['normed_RT'] = df_subjs['trials.submit_resp.rt']/ (10 * (df_subjs['dist_moved'] + .01) )

# boundary stuff
df_subjs['uncertainty'] = (df_subjs['stim_pos_aligned'] - df_subjs['div_pos_aligned']).abs() < 0.1
df_subjs['stim_aligned_to_div'] = (df_subjs['stim_pos_aligned'] - df_subjs['div_pos_aligned']).round(3)
df_subjs['resp_aligned_to_div'] = (df_subjs['chosen_pos_aligned'] - df_subjs['div_pos_aligned']).round(3)
# if loss,  if stim is on penalty side of div, pos if on reward side
df_subjs['stim_aligned_to_cntxt'] = np.where( df_subjs['true_class'] == 1, df_subjs['stim_aligned_to_div'], - df_subjs['stim_aligned_to_div'] )
df_subjs['resp_aligned_to_cntxt'] = np.where( df_subjs['true_class'] == 1, df_subjs['resp_aligned_to_div'], - df_subjs['resp_aligned_to_div'] )

# rank within a block for each subj/cond
grp = ['subj', 'condition', 'blockN']
# build both ranks, assign once, then copy to defragment
df_subjs = df_subjs.assign(
    stim_ranks = df_subjs.groupby(grp)['stim_pos_aligned'].transform('rank'),
    resp_ranks = df_subjs.groupby(grp)['chosen_pos_aligned'].transform('rank'),
).copy()
max_rank = df_subjs['stim_ranks'].max()

df_subjs = df_subjs.copy()

# asserts
df_subjs.to_csv('../../results/psychopy/all_subjs_clean.csv', index=False)

print(df_subjs['outcome'].value_counts(), '\n')
print(df_subjs['correct'].value_counts(), '\n')
print(df_subjs.shape, '\n')
print(len(df_subjs))
assert len(df_subjs) == 240 * len(raw_subjs), "Total trials do not match expected number"

disp_cols = ['sess_type', 'sess_flip', 'shape_order', 'dir_flip', 'target_pos', 'shape', 'valence', 'true_class',
             'div_pos', 'div_pos_aligned', 'stim_pos', 'stim_pos_aligned', 'chosen_pos', 'chosen_pos_aligned', 'pred_class', 'err_type',
             'signed_err', 'unsigned_err']

df_subjs[(df_subjs['sess_type'] == 'B') &
         (df_subjs['condition'] == 'baseline') &
         (df_subjs['err_type'] != 0) &
         (df_subjs['sess_flip'] != df_subjs['dir_flip'])
        ][disp_cols][:20]


outcome
 1.0    683
 3.0    664
-1.0    176
-3.0    157
Name: count, dtype: int64 

correct
1.0    1347
0.0     333
Name: count, dtype: int64 

(1680, 158) 

1680


Unnamed: 0,sess_type,sess_flip,shape_order,dir_flip,target_pos,shape,valence,true_class,div_pos,div_pos_aligned,stim_pos,stim_pos_aligned,chosen_pos,chosen_pos_aligned,pred_class,err_type,signed_err,unsigned_err
241,B,1.0,curv_flat,0.0,-0.18,curv,rew,1,0.0,-0.0,-0.18,0.18,0.016,-0.016,0,-1,-0.196,0.196
243,B,1.0,curv_flat,0.0,-0.3,curv,rew,1,0.0,-0.0,-0.3,0.3,0.04,-0.04,0,-1,-0.34,0.34
262,B,1.0,curv_flat,0.0,-0.02,curv,rew,1,0.0,-0.0,-0.02,0.02,0.068,-0.068,0,-1,-0.088,0.088
264,B,1.0,curv_flat,0.0,-0.1,curv,rew,1,0.0,-0.0,-0.1,0.1,0.024,-0.024,0,-1,-0.124,0.124
275,B,1.0,curv_flat,0.0,0.02,flat,pen,0,0.0,-0.0,0.02,-0.02,-0.22,0.22,1,1,0.24,0.24
400,B,1.0,curv_flat,0.0,-0.3,curv,rew,1,0.0,-0.0,-0.3,0.3,0.016,-0.016,0,-1,-0.316,0.316
408,B,1.0,curv_flat,0.0,-0.02,curv,rew,1,0.0,-0.0,-0.02,0.02,0.048,-0.048,0,-1,-0.068,0.068
411,B,1.0,curv_flat,0.0,0.3,flat,pen,0,0.0,-0.0,0.3,-0.3,-0.036,0.036,1,1,0.336,0.336
415,B,1.0,curv_flat,0.0,-0.14,curv,rew,1,0.0,-0.0,-0.14,0.14,0.012,-0.012,0,-1,-0.152,0.152
423,B,1.0,curv_flat,0.0,-0.26,curv,rew,1,0.0,-0.0,-0.26,0.26,0.04,-0.04,0,-1,-0.3,0.3


In [None]:
# use groupby.head(1) to keep the first trial for each subject
df_first_per_subj = df_subjs.groupby('subj').head(1)[['subj', 'sess_type']].reset_index(drop=True)
df_first_per_subj

Unnamed: 0,subj,sess_type
0,09,A
1,11,B
2,12,C
3,18,D
4,A,B
5,E,B
6,T,C
