In [1]:
import os
import pandas as pd
import numpy as np
from convert_eprime import convert as ep

In [2]:
source_dir = os.path.join('..','sourcedata')
derivs_dir = os.path.join('..','derivatives')

#### Define a function to clean the N-back data

Stack the blocks vertically instead of horizontally, label the trial rows properly, and tag each trial as a HIT, MISS, FA, CR. We are also ouputting a new CSV data file in the sourcedata folder, all cleaned-up.

In [3]:
def nstack_score_label(fpath,outpath):
    df = pd.read_excel(fpath)
    
    # Hierarchicalize the column index
    df.columns=pd.MultiIndex.from_tuples([
        (df.columns[0].split('.')[0],df.columns[0].split('.')[1]),
        (df.columns[1].split('.')[0],df.columns[1].split('.')[1]),
        (df.columns[2].split('.')[0],df.columns[2].split('.')[1]),
        (df.columns[3].split('.')[0],df.columns[3].split('.')[1]),
        (df.columns[4].split('.')[0],df.columns[4].split('.')[1]),
        (df.columns[5].split('.')[0],df.columns[5].split('.')[1]),
    ])
    
    # Stack blocks, Reset trial row index, and Rename columns to be descriptive
    df = df.stack(0).reset_index().rename(
        columns={'level_0':'trial','level_1':'block'}
    ).sort_values(['block','trial'])
    df['sub'] = os.path.basename(fpath).split('_')[0].split('-')[1]
    df['block'] = df['block'].str[1]
    df['trial'] = df['trial'] + 1
    df = df.set_index(['sub','block','trial'])
    
    # Determine Hits, CRs, FAs
    cr_mask = (df['Rsp'] == 0) & (df['CRsp'] == 0)
    ms_mask = (df['Rsp'] == 0) & (df['CRsp'] == 1)
    fa_mask = (df['Rsp'] == 1) & (df['CRsp'] == 0)
    ht_mask = (df['Rsp'] == 1) & (df['CRsp'] == 1)
    df['CR']   = cr_mask.astype(int)
    df['MISS'] = ms_mask.astype(int)
    df['FA']   = fa_mask.astype(int)
    df['HIT']  = ht_mask.astype(int)
    
    # Convert RT 0 to RT NaN
    df['RT'] = df['RT'].replace(0,np.NaN)
    
    # Output to new CSV datafile
    df.to_csv(outpath)
    print('Output file successfully created- ',outpath)

#### Read all the subject data

Executing N-back data cleaning & Setting up for subject-level analysis.

In [4]:
ptb_frames=[]

for s in os.listdir(source_dir):
    if s.startswith('sub-4'):
        sub_dir = os.path.join(source_dir,s)
        for f in os.listdir(sub_dir):
            o = f.split('.')[0]+'.csv'
            fpath = os.path.join(sub_dir,f)
            outpath = os.path.join(sub_dir,o)
            if f.split('_')[-1] == 'beh.xlsx':
                print(f)
                nstack_score_label(fpath,outpath)
                ptb_frames.append(pd.read_csv(outpath))
                ptb_frames[-1]['sub'] = f.split('_')[0].split('-')[1]
print("Done!")


sub-401_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-401\sub-401_task-nback_beh.csv
sub-402_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-402\sub-402_task-nback_beh.csv
Done!


###### Output N-back trial-level data

In [5]:
pd.concat(ptb_frames).to_csv(os.path.join(derivs_dir,'nback_trial_data.csv'),index=False)

### Group, expand, trim N-back data
Group by subjects, get the sum of all columns, the count of the trial column, and the mean of the RT column.

Establish Hit % `number of Hits / number of targets` and FA % `number of FAs / number of foils`. 

Corrected Recognition `HIT% - FA%`. 

In [6]:
grouped = pd.concat(ptb_frames).groupby('sub')
nback_df = grouped.sum()
nback_df['trial'] = grouped.count()['trial']
nback_df['RT'] = grouped.mean()['RT']
nback_df['HIT%'] = nback_df['HIT'] / nback_df['CRsp']
nback_df['FA%'] = nback_df['FA'] / (nback_df['trial'] - nback_df['CRsp'])
nback_df['CoR'] = nback_df['HIT%'] - nback_df['FA%']
nback_df = nback_df[['RT','HIT%','FA%','CoR']]
nback_df.head()

Unnamed: 0_level_0,RT,HIT%,FA%,CoR
sub,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
401,642.0,0.75,0.0625,0.6875
402,963.866667,0.75,0.28125,0.46875


###### Output N-back subject-level data

In [7]:
nback_df.to_csv(os.path.join(derivs_dir,'nback_subject_data.csv'))