In [1]:
import os
import pandas as pd
import numpy as np
from convert_eprime import convert as ep

In [2]:
source_dir = os.path.join('..','sourcedata')
derivs_dir = os.path.join('..','derivatives')

#### Define a function to clean the N-back data

Stack the blocks vertically instead of horizontally, label the trial rows properly, and tag each trial as a HIT, MISS, FA, CR. We are also ouputting a new CSV data file in the sourcedata folder, all cleaned-up.

In [3]:
def nstack_score_label(fpath,outpath):
    df = pd.read_excel(fpath)
    
    # Hierarchicalize the column index
    df.columns=pd.MultiIndex.from_tuples([
        (df.columns[0].split('.')[0],df.columns[0].split('.')[1]),
        (df.columns[1].split('.')[0],df.columns[1].split('.')[1]),
        (df.columns[2].split('.')[0],df.columns[2].split('.')[1]),
        (df.columns[3].split('.')[0],df.columns[3].split('.')[1]),
        (df.columns[4].split('.')[0],df.columns[4].split('.')[1]),
        (df.columns[5].split('.')[0],df.columns[5].split('.')[1]),
    ])
    
    # Stack blocks, Reset trial row index, and Rename columns to be descriptive
    df = df.stack(0).reset_index().rename(
        columns={'level_0':'trial','level_1':'block'}
    ).sort_values(['block','trial'])
    df['sub'] = os.path.basename(fpath).split('_')[0].split('-')[1]
    df['block'] = df['block'].str[1]
    df['trial'] = df['trial'] + 1
    df = df.set_index(['sub','block','trial'])
    
    # Determine Hits, CRs, FAs
    cr_mask = (df['Rsp'] == 0) & (df['CRsp'] == 0)
    ms_mask = (df['Rsp'] == 0) & (df['CRsp'] == 1)
    fa_mask = (df['Rsp'] == 1) & (df['CRsp'] == 0)
    ht_mask = (df['Rsp'] == 1) & (df['CRsp'] == 1)
    df['CR']   = cr_mask.astype(int)
    df['MISS'] = ms_mask.astype(int)
    df['FA']   = fa_mask.astype(int)
    df['HIT']  = ht_mask.astype(int)
    
    # Convert RT 0 to RT NaN
    df['RT'] = df['RT'].replace(0,np.NaN)
    
    # Output to new CSV datafile
    df.to_csv(outpath)
    print('Output file successfully created- ',outpath)

#### Read all the subject data

Reading only data for the full sample (100-series YA & 200-series OA). Executing N-back data cleaning & EPrime text-to-csv conversion. Setting up for subject-level analysis.

In [4]:
ep_frames=[]
ptb_frames=[]

for s in os.listdir(source_dir):
    if s.startswith('sub-1') or s.startswith('sub-2'):
        sub_dir = os.path.join(source_dir,s)
        for f in os.listdir(sub_dir):
            o = f.split('.')[0]+'.csv'
            fpath = os.path.join(sub_dir,f)
            outpath = os.path.join(sub_dir,o)
            if f.split('_')[-1] == 'beh.txt':
                print(f)
                ep.text_to_csv(fpath,outpath)
                ep_frames.append(pd.read_csv(outpath))
            if f.split('_')[-1] == 'beh.xlsx':
                print(f)
                nstack_score_label(fpath,outpath)
                ptb_frames.append(pd.read_csv(outpath))
                ptb_frames[-1]['sub'] = f.split('_')[0].split('-')[1]
print("Done!")


sub-101_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-101\sub-101_task-nback_beh.csv
sub-101_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-101\sub-101_task-procspeed_run-1_beh.csv
sub-102_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-102\sub-102_task-nback_beh.csv
sub-102_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-102\sub-102_task-procspeed_run-1_beh.csv
sub-103_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-103\sub-103_task-nback_beh.csv
sub-103_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-103\sub-103_task-procspeed_run-1_beh.csv
sub-104_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-104\sub-104_task-nback_beh.csv
sub-104_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-104\sub-104_task-procspeed_run-1_beh.csv
sub-105_task-nback_beh.xlsx
Outp

Output file successfully created-  ..\sourcedata\sub-204\sub-204_task-nback_beh.csv
sub-204_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-204\sub-204_task-procspeed_run-1_beh.csv
sub-205_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-205\sub-205_task-nback_beh.csv
sub-205_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-205\sub-205_task-procspeed_run-1_beh.csv
sub-206_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-206\sub-206_task-nback_beh.csv
sub-206_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-206\sub-206_task-procspeed_run-1_beh.csv
sub-207_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-207\sub-207_task-nback_beh.csv
sub-207_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-207\sub-207_task-procspeed_run-1_beh.csv
sub-210_task-nback_beh.xlsx
Output file successfully created

###### Output N-back trial-level data

In [5]:
pd.concat(ptb_frames).to_csv(os.path.join(derivs_dir,'nback_trial_data.csv'),index=False)

### Group, expand, trim N-back data
Group by subjects, get the sum of all columns, the count of the trial column, and the mean of the RT column.

Establish Hit % `number of Hits / number of targets` and FA % `number of FAs / number of foils`. 

Corrected Recognition `HIT% - FA%`. 

In [6]:
grouped = pd.concat(ptb_frames).groupby('sub')
nback_df = grouped.sum()
nback_df['trial'] = grouped.count()['trial']
nback_df['RT'] = grouped.mean()['RT']
nback_df['HIT%'] = nback_df['HIT'] / nback_df['CRsp']
nback_df['FA%'] = nback_df['FA'] / (nback_df['trial'] - nback_df['CRsp'])
nback_df['CoR'] = nback_df['HIT%'] - nback_df['FA%']
nback_df = nback_df[['RT','HIT%','FA%','CoR']]
nback_df.head()

Unnamed: 0_level_0,RT,HIT%,FA%,CoR
sub,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,779.888889,0.875,0.0625,0.8125
102,626.272727,0.75,0.15625,0.59375
103,618.571429,0.875,0.0,0.875
104,505.571429,0.875,0.0,0.875
105,742.913043,1.0,0.46875,0.53125


###### Output N-back subject-level data

In [7]:
nback_df.to_csv(os.path.join(derivs_dir,'nback_subject_data.csv'))

### ProcSpd data
Cleanup: Rehomogenize subject column, concatenate all frames, drop unneeded columns and rows

In [9]:
for df in ep_frames:
    df['Subject'] = df['Subject'][22]

In [11]:
procspd_df = pd.concat(ep_frames)

In [12]:
drop = ['DataFile.Basename','RandomSeed','Group','Display.RefreshRate','Clock.Information',
        'StudioVersion','RuntimeVersion','RuntimeVersionExpected','RuntimeCapabilities',
        'Experiment','SessionDate','SessionTime','SessionStartDateTimeUtc','Session',
        'TestingList','JitterList','TestingList.Cycle','TestingList.Sample',
        'BlockList','BlockList.Cycle','BlockList.Sample','Running',
        'TargetStimulus.OnsetTime','TargetStimulus.DurationError','TargetStimulus.RTTime',
        'TargetStimulus.RESP','TargetStimulus.CRESP','TargetStimulus.OnsetDelay',
        'Buffer.OnsetTime','Buffer.OnsetDelay','Buffer.DurationError','Buffer.RTTime',
        'Buffer.RESP','Buffer.CRESP'
       ]
procspd_df = procspd_df[procspd_df['Procedure']=='TrialProc'].drop(columns=drop)

Unnamed: 0,Procedure,Jitter,Fixation.OnsetToOnsetTime,TargetStimulus.ACC,TargetStimulus.RT,TargetStimulus.OnsetToOnsetTime,Buffer.ACC,Buffer.RT,Buffer.OnsetToOnsetTime,Subject,ExperimentVersion
1,TrialProc,6500.0,6508.0,0.0,0.0,250.0,1.0,74.0,0.0,101.0,
2,TrialProc,7500.0,7509.0,0.0,0.0,250.0,1.0,104.0,0.0,101.0,
3,TrialProc,7500.0,7508.0,0.0,0.0,250.0,1.0,126.0,0.0,101.0,
4,TrialProc,5500.0,5506.0,0.0,0.0,250.0,1.0,102.0,0.0,101.0,
5,TrialProc,7000.0,7008.0,0.0,0.0,250.0,1.0,72.0,0.0,101.0,
6,TrialProc,7500.0,7509.0,0.0,0.0,250.0,1.0,69.0,0.0,101.0,
7,TrialProc,9000.0,9010.0,0.0,0.0,250.0,1.0,13.0,0.0,101.0,
8,TrialProc,5000.0,5006.0,0.0,0.0,250.0,1.0,138.0,0.0,101.0,
9,TrialProc,6000.0,6007.0,0.0,0.0,250.0,1.0,133.0,0.0,101.0,
10,TrialProc,6000.0,6007.0,0.0,0.0,250.0,1.0,17.0,0.0,101.0,


###### Combine response windows for final RT

In [20]:
def calculate_rt(row):
    initial = row['TargetStimulus.RT']
    buffert = row['Buffer.RT']
    initial_duration = row['TargetStimulus.OnsetToOnsetTime']
    if initial == 0 and buffert > 0:
        rt = buffert + initial_duration
    elif initial > 0: rt = initial
    else: rt = np.nan
    return(rt)

procspd_df['RT'] = procspd_df.apply(calculate_rt,axis=1)

###### Output Procspd trial-level data

In [22]:
procspd_df.to_csv(os.path.join(derivs_dir,'procspd_trial_data.csv'),index=False)

#### Group and output ProcSpd subject-level data

In [31]:
grouped = procspd_df.groupby('Subject')
grouped.mean()[['RT']].to_csv(os.path.join(derivs_dir,'procspd_subject_data.csv'))