In [2]:
import pandas as pd
import numpy as np
import glob
import os
import pickle

## Notes

Subject 1bis is a duplicate of subject 1, added for development purposes

Subject 1 does not seem to have missed prediction trials. Unsure how things should work if this were to happen

In [3]:
## Set some useful metadata
metadata={
  'dataset_name': 'dev',
  'folder_logfiles': 'logfiles',
  'included_participants': ['1', '1bis'],
  'included_sessions': ['1','2'],
  'included_runs': ['1','2','3'],  
  'folder_compiled_data': 'compiled_data',
  'SAmap': [[1,2],[0,2],[0,1]]
}

prediction_columns=['trial_num','state_side','correct_resp','state_choice','P_key_resp.keys','P_fixation.started','P_ITI.started','feedback',
                    'P_state.started','P_action.started', 'P_state_choice_L.started','P_state_choice_R.started',
                    'P_key_resp.started','P_fixation.stopped','P_feedback_img.started', 'Accuracy','P_choice_box.started','P_key_resp.rt']

In [4]:
## Initialize dataframe
df_full=pd.DataFrame()

## Loop over expected logfiles
for pix, pid in enumerate(metadata['included_participants']):
  
  for six, sid in enumerate(metadata['included_sessions']):
    
    for rix, rid in enumerate(metadata['included_runs']):
      
      # get file
      filepath_base=os.path.join(metadata['folder_logfiles'], f'{pid}_{sid}_{rid}_EP_task*.csv')
      filepath=glob.glob(filepath_base)
      #
      assert len(filepath)==1, f'There should be only one file matching {filepath_base}'
      #
      df_run=pd.read_csv(filepath[0])
      df_predict=df_run[prediction_columns].copy(deep=True)
      df_predict=df_predict[df_predict['P_key_resp.keys'].isna()==False].reset_index(drop=True)
      # clean up
      df_run=df_run[df_run['E_catch.thisTrialN'].isna()==False].reset_index(drop=True)
      # add r
      prediction_index=np.where(df_run['trial_tag_bool']==1)[0]
      if prediction_index.shape[0]>df_predict.values.shape[0]:
        print(f'Warning, {prediction_index.shape[0]-df_predict.values.shape[0]} prediction trial missing in {pid}_{sid}_{rid}')
        print(f'Accepting for now, assuming the last prediction responses was/were not recorded...')

      df_run.loc[prediction_index[:df_predict.values.shape[0]],prediction_columns]=df_predict.values

      df_run['missedExplore']=0
      repeatedExplore_ind=np.where(df_run['E_catch.thisN'].diff()>0)[0]-1
      if repeatedExplore_ind.shape[0]>0:
        df_run.loc[repeatedExplore_ind,'missedExplore']=1
        print(f'{repeatedExplore_ind.shape[0]} missed exploration trials {pid}_{sid}_{rid}')
      
      df_run['missedPredict']=0
      repeatedPredict_ind=np.where(df_run['P_catch.thisN'].diff()>0)[0]-1
      if repeatedPredict_ind.shape[0]>0:
        df_run.loc[repeatedPredict_ind,'missedPredict']=1    
        print(f'{repeatedPredict_ind.shape[0]} missed prediction trials {pid}_{sid}_{rid}')  

      df_run['missed']=df_run['missedExplore']+df_run['missedPredict']

      df_run['participant']=pid
      df_full=pd.concat([df_full,df_run],axis=0,ignore_index=True).reset_index(drop=True)
      

4 missed exploration trials 1_1_1
1 missed exploration trials 1_1_3
Accepting for now, assuming the last prediction responses was/were not recorded...
Accepting for now, assuming the last prediction responses was/were not recorded...
4 missed exploration trials 1bis_1_1
1 missed exploration trials 1bis_1_3
Accepting for now, assuming the last prediction responses was/were not recorded...
Accepting for now, assuming the last prediction responses was/were not recorded...


In [5]:
# recover actions (Crule: A0=>1, A1=>2,A2=>3)
cols_to_int = ['trial_tag_bool', 'action_side', 'current_state','prediction_targets_end','prediction_state','current_rule','Accuracy','feedback','E_key_resp.keys', 'state_choice']
df_full[cols_to_int] = df_full[cols_to_int].fillna(-1).astype(int)

rules=[
  [
  [1,1,1],
  [2,2,2],
  [0,0,0],
  ],
  [
  [0,1,2],
  [0,1,2],
  [0,1,2],
  ],  
]

df_full['action']=-1
df_full['next_state']=-1

for ix, row in df_full.iterrows():
  
  if row['trial_tag_bool']==0 and row['missed']==0:
    if row['action_side']==1:
      df_full.loc[ix,'action']=metadata['SAmap'][row['current_state']-1][row['E_key_resp.keys']-1]
    else:
      df_full.loc[ix,'action']=metadata['SAmap'][row['current_state']-1][1-(row['E_key_resp.keys']-1)]
    if df_full.loc[ix+1,'trial_tag_bool']==0:
      df_full.loc[ix,'next_state']=df_full.loc[ix+1,'current_state']-1
  elif row['trial_tag_bool']==1 and row['missed']==0:
    df_full.loc[ix,'action']=df_full.loc[ix,'prediction_targets_end']-1
    df_full.loc[ix,'next_state']=rules[row['current_rule']][row['prediction_state']-1][row['prediction_targets_end']-1]

In [None]:
df_full['state']=df_full['current_state'].values
df_full.loc[df_full['prediction_state']>=0,'state']=df_full.loc[df_full['prediction_state']>=0,'prediction_state'].values-1

df_full['reward']=np.nan
df_full.loc[df_full['feedback']==1,'R']=df_full.loc[df_full['feedback']==1,'Accuracy'].values

df_full['newblock']=0
df_full.loc[df_full.trial_num==0,'newblock']=1

df_full['action']=df_full['action'].astype(int)
df_full['next_state']=df_full['next_state'].astype(int)
df_full['visit']=df_full['visit'].astype(int)

In [12]:
df_full['prediction_state']

0      -1
1      -1
2      -1
3      -1
4      -1
       ..
1455   -1
1456   -1
1457   -1
1458    3
1459    3
Name: prediction_state, Length: 1460, dtype: int64

In [8]:
print("Sanity check, prediction in controllable rule")
print(df_full[(df_full['trial_tag_bool']==1) & (df_full['current_rule']==1)].groupby(['prediction_state','prediction_targets_end','state_choice'])['Accuracy'].mean())
print("Sanity check, prediction in uncontrollable rule")
print(df_full[(df_full['trial_tag_bool']==1) & (df_full['current_rule']==0)].groupby(['prediction_state','prediction_targets_end','state_choice'])['Accuracy'].mean())

Sanity check, prediction in controllable rule
prediction_state  prediction_targets_end  state_choice
1                 2                        2              1.0
                                           3              0.0
                  3                        2              0.0
                                           3              1.0
2                 1                        1              1.0
                                           3              0.0
                  3                        1              0.0
                                           3              1.0
3                 1                       -1             -1.0
                                           1              1.0
                  2                       -1             -1.0
                                           1              0.0
                                           2              1.0
Name: Accuracy, dtype: float64
Sanity check, prediction in uncontrollable rule
prediction_sta

In [9]:
os.makedirs(os.path.join(metadata['folder_compiled_data'],metadata['dataset_name']),exist_ok=True)
df_full.to_pickle(os.path.join(metadata['folder_compiled_data'],metadata['dataset_name'], 'dataset.pkl'))

In [10]:
with open(os.path.join(metadata['folder_compiled_data'],metadata['dataset_name'], 'metadata.pkl'), 'wb') as f:
    pickle.dump(metadata, f)