In [35]:
import os
import pandas as pd
import numpy as np
from convert_eprime import convert as ep

In [2]:
source_dir = os.path.join('..','sourcedata')
derivs_dir = os.path.join('..','derivatives')

#### Define a function to clean the N-back data

Stack the blocks vertically instead of horizontally, label the trial rows properly, and tag each trial as a HIT, MISS, FA, CR. We are also ouputting a new CSV data file in the sourcedata folder, all cleaned-up.

In [390]:
def nstack_score_label(fpath,outpath):
    df = pd.read_excel(fpath)
    
    # Hierarchicalize the column index
    df.columns=pd.MultiIndex.from_tuples([
        (df.columns[0].split('.')[0],df.columns[0].split('.')[1]),
        (df.columns[1].split('.')[0],df.columns[1].split('.')[1]),
        (df.columns[2].split('.')[0],df.columns[2].split('.')[1]),
        (df.columns[3].split('.')[0],df.columns[3].split('.')[1]),
        (df.columns[4].split('.')[0],df.columns[4].split('.')[1]),
        (df.columns[5].split('.')[0],df.columns[5].split('.')[1]),
    ])
    
    # Stack blocks, Reset trial row index, and Rename columns to be descriptive
    df = df.stack(0).reset_index().rename(
        columns={'level_0':'trial','level_1':'block'}
    ).sort_values(['block','trial'])
    df['sub'] = os.path.basename(fpath).split('_')[0].split('-')[1]
    df['block'] = df['block'].str[1]
    df['trial'] = df['trial'] + 1
    df = df.set_index(['sub','block','trial'])
    
    # Determine Hits, CRs, FAs
    cr_mask = (df['Rsp'] == 0) & (df['CRsp'] == 0)
    ms_mask = (df['Rsp'] == 0) & (df['CRsp'] == 1)
    fa_mask = (df['Rsp'] == 1) & (df['CRsp'] == 0)
    ht_mask = (df['Rsp'] == 1) & (df['CRsp'] == 1)
    df['CR']   = cr_mask.astype(int)
    df['MISS'] = ms_mask.astype(int)
    df['FA']   = fa_mask.astype(int)
    df['HIT']  = ht_mask.astype(int)
    
    # Output to new CSV datafile
    df.to_csv(outpath)
    print('Output file successfully created- ',outpath)

#### Read all the subject data

Reading only data for the full sample (100-series YA & 200-series OA). Executing N-back data cleaning & EPrime txt-to-csv conversion. Setting up for subject-level analysis.

In [393]:
ep_frames=[]
ptb_frames=[]

for s in os.listdir(source_dir):
    if s.startswith('sub-1') or s.startswith('sub-2'):
        sub_dir = os.path.join(source_dir,s)
        for f in os.listdir(sub_dir):
            o = f.split('.')[0]+'.csv'
            fpath = os.path.join(sub_dir,f)
            outpath = os.path.join(sub_dir,o)
            if f.split('_')[-1] == 'beh.txt':
                print(f)
                ep.text_to_csv(fpath,outpath)
                ep_frames.append(pd.read_csv(outpath))
            if f.split('_')[-1] == 'beh.xlsx':
                print(f)
                nstack_score_label(fpath,outpath)
                ptb_frames.append(pd.read_csv(outpath))
                ptb_frames[-1]['sub'] = f.split('_')[0].split('-')[1]
print("Done!")


sub-101_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-101\sub-101_task-nback_beh.csv
sub-101_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-101\sub-101_task-procspeed_run-1_beh.csv
sub-102_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-102\sub-102_task-nback_beh.csv
sub-102_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-102\sub-102_task-procspeed_run-1_beh.csv
sub-103_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-103\sub-103_task-nback_beh.csv
sub-103_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-103\sub-103_task-procspeed_run-1_beh.csv
sub-104_task-nback_beh.xlsx
Output file successfully created-  ..\sourcedata\sub-104\sub-104_task-nback_beh.csv
sub-104_task-procspeed_run-1_beh.txt
Output file successfully created- ..\sourcedata\sub-104\sub-104_task-procspeed_run-1_beh.csv
sub-105_task-nback_beh.xlsx
Outp