In [1]:
import os
import pandas as pd
import numpy as np
from convert_eprime import convert as ep

In [2]:
source_dir = os.path.join('..','sourcedata')
derivs_dir = os.path.join('..','derivatives','0.3.cleaned')

#### Define a function to clean the N-back data

Stack the blocks vertically instead of horizontally, label the trial rows properly, and tag each trial as a HIT, MISS, FA, CR. We are also ouputting a new CSV data file in the sourcedata folder, all cleaned-up.

In [3]:
def nstack_score_label(df):
    
    # Hierarchicalize the column index
    df.columns=pd.MultiIndex.from_tuples([
        (df.columns[0].split('.')[0],df.columns[0].split('.')[1]),
        (df.columns[1].split('.')[0],df.columns[1].split('.')[1]),
        (df.columns[2].split('.')[0],df.columns[2].split('.')[1]),
        (df.columns[3].split('.')[0],df.columns[3].split('.')[1]),
        (df.columns[4].split('.')[0],df.columns[4].split('.')[1]),
        (df.columns[5].split('.')[0],df.columns[5].split('.')[1]),
    ])
    
    # Stack blocks, Reset trial row index, and Rename columns to be descriptive
    df = df.stack(0).reset_index().rename(
        columns={'level_0':'trial','level_1':'block'}
    ).sort_values(['block','trial'])
    df['sub'] = os.path.basename(fpath).split('_')[0].split('-')[1]
    df['block'] = df['block'].str[1]
    df['trial'] = df['trial'] + 1
    df = df.set_index(['sub','block','trial'])
    
    # Determine Hits, CRs, FAs
    cr_mask = (df['Rsp'] == 0) & (df['CRsp'] == 0)
    ms_mask = (df['Rsp'] == 0) & (df['CRsp'] == 1)
    fa_mask = (df['Rsp'] == 1) & (df['CRsp'] == 0)
    ht_mask = (df['Rsp'] == 1) & (df['CRsp'] == 1)
    df['CR']   = cr_mask.astype(int)
    df['MISS'] = ms_mask.astype(int)
    df['FA']   = fa_mask.astype(int)
    df['HIT']  = ht_mask.astype(int)
    
    # Convert RT 0 to RT NaN
    df['RT'] = df['RT'].replace(0,np.NaN)
    
    # Output to new CSV datafile
    return(df)

#### Read all the subject data

Reading only data for the full sample (100-series YA & 200-series OA). Executing N-back data cleaning & EPrime text-to-csv conversion. Setting up for subject-level analysis.

In [4]:
ep_frames=[]
ptb_frames=[]

for s in os.listdir(source_dir):
    if s.startswith('sub-1') or s.startswith('sub-2'):
        sub_dir = os.path.join(source_dir,s)
        for f in os.listdir(sub_dir):
            o = f.split('.')[0]+'.csv'
            fpath = os.path.join(sub_dir,f)
            outpath = os.path.join(sub_dir,o)
            if f.split('_')[-1] == 'beh.txt':
                print(f)
                if os.path.isfile(outpath):
                    print(os.path.basename(outpath),'exists')
                else:
                    ep.text_to_csv(fpath,outpath)
                ep_frames.append(pd.read_csv(outpath))
            if f.split('_')[-1] == 'beh.xlsx':
                print(f)
                if os.path.isfile(outpath):
                    print(os.path.basename(outpath),'exists')
                else:
                    nstack_score_label(pd.read_csv(fpath)).to_csv(outpath)
                    print('Output file successfully created- ',outpath)
                ptb_frames.append(pd.read_csv(outpath))
                ptb_frames[-1]['sub'] = f.split('_')[0].split('-')[1]
print("Done!")

sub-101_task-nback_beh.xlsx
sub-101_task-nback_beh.csv exists
sub-101_task-procspeed_run-1_beh.txt
sub-101_task-procspeed_run-1_beh.csv exists
sub-102_task-nback_beh.xlsx
sub-102_task-nback_beh.csv exists
sub-102_task-procspeed_run-1_beh.txt
sub-102_task-procspeed_run-1_beh.csv exists
sub-103_task-nback_beh.xlsx
sub-103_task-nback_beh.csv exists
sub-103_task-procspeed_run-1_beh.txt
sub-103_task-procspeed_run-1_beh.csv exists
sub-104_task-nback_beh.xlsx
sub-104_task-nback_beh.csv exists
sub-104_task-procspeed_run-1_beh.txt
sub-104_task-procspeed_run-1_beh.csv exists
sub-105_task-nback_beh.xlsx
sub-105_task-nback_beh.csv exists
sub-105_task-procspeed_run-1_beh.txt
sub-105_task-procspeed_run-1_beh.csv exists
sub-106_task-nback_beh.xlsx
sub-106_task-nback_beh.csv exists
sub-106_task-procspeed_run-1_beh.txt
sub-106_task-procspeed_run-1_beh.csv exists
sub-107_task-nback_beh.xlsx
sub-107_task-nback_beh.csv exists
sub-107_task-procspeed_run-1_beh.txt
sub-107_task-procspeed_run-1_beh.csv exists

sub-207_task-procspeed_run-1_beh.csv exists
sub-208_task-nback_beh.xlsx
sub-208_task-nback_beh.csv exists
sub-208_task-procspeed_run-1_beh.txt
sub-208_task-procspeed_run-1_beh.csv exists
sub-209_task-nback_beh.xlsx
sub-209_task-nback_beh.csv exists
sub-209_task-procspeed_run-1_beh.txt
sub-209_task-procspeed_run-1_beh.csv exists
sub-210_task-nback_beh.xlsx
sub-210_task-nback_beh.csv exists
sub-210_task-procspeed_run-1_beh.txt
sub-210_task-procspeed_run-1_beh.csv exists
sub-211_task-nback_beh.xlsx
sub-211_task-nback_beh.csv exists
sub-211_task-procspeed_run-1_beh.txt
sub-211_task-procspeed_run-1_beh.csv exists
sub-212_task-nback_beh.xlsx
sub-212_task-nback_beh.csv exists
sub-212_task-procspeed_run-1_beh.txt
sub-212_task-procspeed_run-1_beh.csv exists
sub-213_task-nback_beh.xlsx
sub-213_task-nback_beh.csv exists
sub-213_task-procspeed_run-1_beh.txt
sub-213_task-procspeed_run-1_beh.csv exists
sub-214_task-nback_beh.xlsx
sub-214_task-nback_beh.csv exists
sub-214_task-procspeed_run-1_beh.txt

###### Output N-back trial-level data

In [5]:
nback_trials = pd.concat(ptb_frames)
fpath = os.path.join(derivs_dir,'nback_trial_level.csv')
nback_trials.to_csv(fpath,index=False)

### Group, expand, trim N-back data
Group by subjects, get the sum of all columns, the count of the trial column, and the mean of the RT column.

Establish Hit % `number of Hits / number of targets` and FA % `number of FAs / number of foils`. 

Corrected Recognition `HIT% - FA%`. 

In [6]:
nback_hits = nback_trials[nback_trials['HIT'] == 1]
grouped_trials = nback_trials.groupby('sub')
grouped_hits = nback_hits.groupby('sub')

In [7]:
nback_subs = grouped_trials.sum()
nback_subs['trial'] = grouped_trials.count()['trial']
nback_subs['RT'] = grouped_hits.mean()['RT']
nback_subs['HIT%'] = nback_subs['HIT'] / nback_subs['CRsp']
nback_subs['FA%'] = nback_subs['FA'] / (nback_subs['trial'] - nback_subs['CRsp'])
nback_subs['CoR'] = nback_subs['HIT%'] - nback_subs['FA%']
nback_subs = nback_subs[['RT','HIT%','FA%','CoR']]
nback_subs.head()

Unnamed: 0_level_0,RT,HIT%,FA%,CoR
sub,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,685.285714,0.875,0.0625,0.8125
102,572.0,0.75,0.15625,0.59375
103,618.571429,0.875,0.0,0.875
104,505.571429,0.875,0.0,0.875
105,715.25,1.0,0.46875,0.53125


###### Output N-back subject-level data

In [8]:
fpath = os.path.join(derivs_dir,'nback_subject_level.csv')
nback_subs.to_csv(fpath)

### ProcSpd data
Cleanup: Rehomogenize subject column, concatenate all frames, drop unneeded columns and rows

In [9]:
for df in ep_frames:
    df['Subject'] = df['Subject'].iloc[-1]
procspd_trials = pd.concat(ep_frames)

In [10]:
procspd_trials = procspd_trials[procspd_trials['Procedure']=='TrialProc']
procspd_trials = procspd_trials[[
    'Subject','TargetStimulus.RT','TargetStimulus.OnsetToOnsetTime','Buffer.RT'
]]

###### Combine response windows for final RT

In [11]:
def calculate_rt(row):
    initial = row['TargetStimulus.RT']
    buffert = row['Buffer.RT']
    initial_duration = row['TargetStimulus.OnsetToOnsetTime']
    if initial == 0 and buffert > 0:
        rt = buffert + initial_duration
    elif initial > 0: rt = initial
    else: rt = np.nan
    return(rt)

procspd_trials['RT'] = procspd_trials.apply(calculate_rt,axis=1)

###### Output Procspd trial-level data

In [12]:
fpath = os.path.join(derivs_dir,'procspd_trial_level.csv')
procspd_trials.to_csv(fpath,index=False)

#### Group and output ProcSpd subject-level data

In [13]:
grouped = procspd_trials.groupby('Subject')
procspd_subs = grouped.mean()[['RT']]

In [14]:
fpath = os.path.join(derivs_dir,'procspd_subject_level.csv')
procspd_subs.to_csv(fpath)

# Next step
## Join all subject-level data
Now that the subject-level data is cleaned & computed for ...
- Survey measures
- Comprehension
- N-back
- Processing speed

... we can combine all of that data into our final subject-level data set: [0.4.join_subject_level.ipynb](0.4.join_subject_level.ipynb)