In [31]:
import pandas as pd
import glob
import numpy as np
from scipy import stats

In [32]:
all_files =  sorted(glob.glob('/BICNAS2/tuominen/feobv-fmri/sourcedata/par/**/*wm*.txt'))

In [33]:
alltasks = pd.DataFrame(columns=['Subject','SubTrial','BlockCondition','CorrectHit','Slide3.RESP','Slide3.ACC','Slide3.RT'])
skipped_files = []

for f in all_files:
    try:
        run = int(f.split('/')[-1].split('_')[-1][-6:-4])
        
        # Try UTF-8 first, then fallback to other encodings
        try:
            par = pd.read_table(f, header=3, encoding='utf-8')
        except UnicodeDecodeError:
            # Try UTF-16 or latin-1 for problematic files
            try:
                par = pd.read_table(f, header=3, encoding='utf-16')
            except:
                par = pd.read_table(f, header=3, encoding='latin-1')
        
        wmtask = par[['Subject','SubTrial','BlockCondition','CorrectHit','Slide3.RESP','Slide3.ACC','Slide3.RT']]
        wmtask = wmtask.fillna(0)    
        wmtask['run'] = run
        ### calculate d prime stats
        wmtask['n_hit'] = np.where((wmtask['CorrectHit'] == 1) & (wmtask['Slide3.RESP'] == 1), 1, 0)
        wmtask['n_cr'] = np.where((wmtask['CorrectHit'] == 0) & (wmtask['Slide3.RESP'] == 0), 1, 0)
        wmtask['n_fa'] = np.where((wmtask['CorrectHit'] == 0) & (wmtask['Slide3.RESP'] == 1), 1, 0)
        wmtask['n_miss'] = np.where((wmtask['CorrectHit'] == 1) & (wmtask['Slide3.RESP'] == 0), 1, 0)
        alltasks = pd.concat([alltasks, wmtask])
    except Exception as e:
        skipped_files.append(f)
        
alltasks['group'] = alltasks['Subject'].apply(lambda x: 'hc' if x < 500 else 'scz')

print(f"Total files: {len(all_files)}")
print(f"Successfully processed: {len(all_files) - len(skipped_files)}")
print(f"Skipped: {len(skipped_files)}")
if skipped_files:
    print(f"\nSkipped files:")
    for sf in skipped_files:
        print(f"  - {sf}")

  alltasks = pd.concat([alltasks, wmtask])


Total files: 174
Successfully processed: 171
Skipped: 3

Skipped files:
  - /BICNAS2/tuominen/feobv-fmri/sourcedata/par/060/FEOBV060_wm001.txt
  - /BICNAS2/tuominen/feobv-fmri/sourcedata/par/060/FEOBV060_wm002.txt
  - /BICNAS2/tuominen/feobv-fmri/sourcedata/par/060/FEOBV060_wm003.txt


In [34]:
alltasks

Unnamed: 0,Subject,SubTrial,BlockCondition,CorrectHit,Slide3.RESP,Slide3.ACC,Slide3.RT,run,n_hit,n_cr,n_fa,n_miss,group
0,2,1,2-back,0.0,0.0,1,0,1.0,0.0,1.0,0.0,0.0,hc
1,2,2,2-back,0.0,0.0,1,0,1.0,0.0,1.0,0.0,0.0,hc
2,2,3,2-back,0.0,0.0,1,0,1.0,0.0,1.0,0.0,0.0,hc
3,2,4,2-back,1.0,1.0,1,936,1.0,1.0,0.0,0.0,0.0,hc
4,2,5,2-back,0.0,0.0,1,0,1.0,0.0,1.0,0.0,0.0,hc
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,545,8,0-back,0.0,0.0,1,0,3.0,0.0,1.0,0.0,0.0,scz
68,545,9,0-back,0.0,0.0,1,0,3.0,0.0,1.0,0.0,0.0,scz
69,545,10,0-back,1.0,1.0,1,604,3.0,1.0,0.0,0.0,0.0,scz
70,545,11,0-back,1.0,1.0,1,559,3.0,1.0,0.0,0.0,0.0,scz


In [37]:
# not sure if 203 is 28
alltasks = alltasks.loc[alltasks.Subject!=203]

In [38]:
alltasks.to_csv('/BICNAS2/tuominen/ANM2_SCZ/code/wmtask_allsubjects.csv')