In [1]:
import os
import pandas as pd
import numpy as np
from convert_eprime import convert as ep

In [2]:
source_dir = os.path.join('..','sourcedata')
derivs_dir = os.path.join('..','derivatives')

# Convert all N-back data to csv

#### Define a function to clean the N-back data

Stack the blocks vertically instead of horizontally, label the trial rows properly, and tag each trial as a HIT, MISS, FA, CR. We are also ouputting a new CSV data file in the sourcedata folder, all cleaned-up.

In [3]:
def nstack_score_label(fpath,outpath):
    df = pd.read_excel(fpath)
    
    # Hierarchicalize the column index
    df.columns=pd.MultiIndex.from_tuples([
        (df.columns[0].split('.')[0],df.columns[0].split('.')[1]),
        (df.columns[1].split('.')[0],df.columns[1].split('.')[1]),
        (df.columns[2].split('.')[0],df.columns[2].split('.')[1]),
        (df.columns[3].split('.')[0],df.columns[3].split('.')[1]),
        (df.columns[4].split('.')[0],df.columns[4].split('.')[1]),
        (df.columns[5].split('.')[0],df.columns[5].split('.')[1]),
    ])
    
    # Stack blocks, Reset trial row index, and Rename columns to be descriptive
    df = df.stack(0).reset_index().rename(
        columns={'level_0':'trial','level_1':'block'}
    ).sort_values(['block','trial'])
    df['sub'] = os.path.basename(fpath).split('_')[0].split('-')[1]
    df['block'] = df['block'].str[1]
    df['trial'] = df['trial'] + 1
    df = df.set_index(['sub','block','trial'])
    
    # Determine Hits, CRs, FAs
    cr_mask = (df['Rsp'] == 0) & (df['CRsp'] == 0)
    ms_mask = (df['Rsp'] == 0) & (df['CRsp'] == 1)
    fa_mask = (df['Rsp'] == 1) & (df['CRsp'] == 0)
    ht_mask = (df['Rsp'] == 1) & (df['CRsp'] == 1)
    df['CR']   = cr_mask.astype(int)
    df['MISS'] = ms_mask.astype(int)
    df['FA']   = fa_mask.astype(int)
    df['HIT']  = ht_mask.astype(int)
    
    # Convert RT 0 to RT NaN
    df['RT'] = df['RT'].replace(0,np.NaN)
    
    # Output to new CSV datafile
    df.to_csv(outpath)
    print('Output file successfully created- ',outpath)

#### Read all the subject data

Executing N-back data cleaning & Setting up for subject-level analysis.

In [4]:
for s in os.listdir(source_dir):
    if s.startswith('sub-4'):
        sub_dir = os.path.join(source_dir,s)
        for f in os.listdir(sub_dir):
            if f.endswith('.xlsx') and f.split('_')[-2] == 'task-nback':
                #print(f)
                o = f.split('.')[0]+'.csv'
                fpath = os.path.join(sub_dir,f)
                opath = os.path.join(sub_dir,o)
                if os.path.isfile(opath):
                    print(opath,'exists')
                else:
                    o = os.path.basename(fpath).split('.')[0]+'.csv' 
                    opath = os.path.join(sub_dir,o)
                    print(fpath,'=>',opath)
                    nstack_score_label(fpath,opath)            

..\sourcedata\sub-401\sub-401_task-nback_beh.csv exists
..\sourcedata\sub-402\sub-402_task-nback_beh.csv exists
..\sourcedata\sub-403\sub-403_task-nback_beh.csv exists
..\sourcedata\sub-404\sub-404_task-nback_beh.csv exists
..\sourcedata\sub-405\sub-405_task-nback_beh.csv exists
..\sourcedata\sub-406\sub-406_task-nback_beh.csv exists
..\sourcedata\sub-407\sub-407_task-nback_beh.csv exists
..\sourcedata\sub-408\sub-408_task-nback_beh.csv exists
..\sourcedata\sub-409\sub-409_task-nback_beh.csv exists
..\sourcedata\sub-410\sub-410_task-nback_beh.csv exists
..\sourcedata\sub-411\sub-411_task-nback_beh.csv exists
..\sourcedata\sub-412\sub-412_task-nback_beh.csv exists
..\sourcedata\sub-413\sub-413_task-nback_beh.csv exists
..\sourcedata\sub-414\sub-414_task-nback_beh.csv exists
..\sourcedata\sub-415\sub-415_task-nback_beh.csv exists
..\sourcedata\sub-416\sub-416_task-nback_beh.csv exists
..\sourcedata\sub-419\sub-419_task-nback_beh.csv exists
..\sourcedata\sub-420\sub-420_task-nback_beh.csv

# Combine subjects and output trial-level

## Read in and concatenate subject datasheets

In [5]:
cleaned_dataframes_list = []

for s in os.listdir(source_dir):
    if s.startswith('sub-4'):
        sub_dir = os.path.join(source_dir,s)
        for f in os.listdir(sub_dir):
            if f.endswith('.csv') and f.split('_')[-2] == 'task-nback':
                fpath = os.path.join(sub_dir,f)
                df = pd.read_csv(fpath)
                cleaned_dataframes_list.append(df)
                print(df.head())

   sub  block  trial  CRsp  RT  Rsp  CR  MISS  FA  HIT
0  401      1      1     0 NaN    0   1     0   0    0
1  401      1      2     0 NaN    0   1     0   0    0
2  401      1      3     0 NaN    0   1     0   0    0
3  401      1      4     0 NaN    0   1     0   0    0
4  401      1      5     0 NaN    0   1     0   0    0
   sub  block  trial  CRsp  RT  Rsp  CR  MISS  FA  HIT
0  402      1      1     0 NaN    0   1     0   0    0
1  402      1      2     0 NaN    0   1     0   0    0
2  402      1      3     0 NaN    0   1     0   0    0
3  402      1      4     0 NaN    0   1     0   0    0
4  402      1      5     0 NaN    0   1     0   0    0
   sub  block  trial  CRsp  RT  Rsp  CR  MISS  FA  HIT
0  403      1      1     0 NaN    0   1     0   0    0
1  403      1      2     0 NaN    0   1     0   0    0
2  403      1      3     0 NaN    0   1     0   0    0
3  403      1      4     0 NaN    0   1     0   0    0
4  403      1      5     0 NaN    0   1     0   0    0
   sub  bl

   sub  block  trial  CRsp  RT  Rsp  CR  MISS  FA  HIT
0  431      1      1     0 NaN    0   1     0   0    0
1  431      1      2     0 NaN    0   1     0   0    0
2  431      1      3     0 NaN    0   1     0   0    0
3  431      1      4     0 NaN    0   1     0   0    0
4  431      1      5     0 NaN    0   1     0   0    0
   sub  block  trial  CRsp  RT  Rsp  CR  MISS  FA  HIT
0  432      1      1     0 NaN    0   1     0   0    0
1  432      1      2     0 NaN    0   1     0   0    0
2  432      1      3     0 NaN    0   1     0   0    0
3  432      1      4     0 NaN    0   1     0   0    0
4  432      1      5     0 NaN    0   1     0   0    0
   sub  block  trial  CRsp  RT  Rsp  CR  MISS  FA  HIT
0  433      1      1     0 NaN    0   1     0   0    0
1  433      1      2     0 NaN    0   1     0   0    0
2  433      1      3     0 NaN    0   1     0   0    0
3  433      1      4     0 NaN    0   1     0   0    0
4  433      1      5     0 NaN    0   1     0   0    0
   sub  bl

In [6]:
oa_nback_trial_level = pd.concat(cleaned_dataframes_list)

###### Output N-back trial-level data

In [7]:
oa_nback_trial_level.to_csv(os.path.join(derivs_dir,'trialwise','oa_nback_trial_level.csv'),index=False)

##### Input YA N-back trial-level data

In [8]:
ya_nback_trial_level = pd.read_csv(os.path.join(derivs_dir,'trialwise','ya_nback_trial_level.csv'))

###### Merge

In [9]:
nback_trial_level = oa_nback_trial_level.merge(ya_nback_trial_level, how='outer')

In [10]:
nback_trial_level.groupby('sub').describe()[['HIT','FA']]

Unnamed: 0_level_0,HIT,HIT,HIT,HIT,HIT,HIT,HIT,HIT,FA,FA,FA,FA,FA,FA,FA,FA
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
sub,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
145,21.0,0.142857,0.358569,0.0,0.0,0.0,0.0,1.0,21.0,0.190476,0.402374,0.0,0.0,0.0,0.00,1.0
146,27.0,0.148148,0.362014,0.0,0.0,0.0,0.0,1.0,27.0,0.222222,0.423659,0.0,0.0,0.0,0.00,1.0
147,27.0,0.037037,0.192450,0.0,0.0,0.0,0.0,1.0,27.0,0.000000,0.000000,0.0,0.0,0.0,0.00,0.0
148,27.0,0.111111,0.320256,0.0,0.0,0.0,0.0,1.0,27.0,0.074074,0.266880,0.0,0.0,0.0,0.00,1.0
149,27.0,0.259259,0.446576,0.0,0.0,0.0,0.5,1.0,27.0,0.074074,0.266880,0.0,0.0,0.0,0.00,1.0
150,27.0,0.037037,0.192450,0.0,0.0,0.0,0.0,1.0,27.0,0.222222,0.423659,0.0,0.0,0.0,0.00,1.0
152,27.0,0.111111,0.320256,0.0,0.0,0.0,0.0,1.0,27.0,0.000000,0.000000,0.0,0.0,0.0,0.00,0.0
153,27.0,0.185185,0.395847,0.0,0.0,0.0,0.0,1.0,27.0,0.185185,0.395847,0.0,0.0,0.0,0.00,1.0
154,26.0,0.230769,0.429669,0.0,0.0,0.0,0.0,1.0,26.0,0.038462,0.196116,0.0,0.0,0.0,0.00,1.0
155,26.0,0.230769,0.429669,0.0,0.0,0.0,0.0,1.0,26.0,0.115385,0.325813,0.0,0.0,0.0,0.00,1.0


### Group, expand, trim N-back data
Group by subjects, get the sum of all columns, the count of the trial column, and the mean of the RT column.

Establish Hit % `number of Hits / number of targets` and FA % `number of FAs / number of foils`. 

Corrected Recognition `HIT% - FA%`. 

In [11]:
grouped = nback_trial_level.groupby('sub')
nback_df = grouped.sum()
nback_df['trial'] = grouped.count()['trial']
nback_df['RT'] = grouped.mean()['RT']
nback_df['HIT%'] = nback_df['HIT'] / nback_df['CRsp']
nback_df['FA%'] = nback_df['FA'] / (nback_df['trial'] - nback_df['CRsp'])
nback_df['CoR'] = nback_df['HIT%'] - nback_df['FA%']
nback_df = nback_df[['RT','HIT%','FA%','CoR']]
nback_df.head()

Unnamed: 0_level_0,RT,HIT%,FA%,CoR
sub,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
145,1619.714286,0.75,0.153846,0.596154
146,1401.740741,1.0,0.230769,0.769231
147,557.777778,0.333333,0.0,0.333333
148,710.333333,1.0,0.074074,0.925926
149,819.962963,0.7,0.1,0.6


###### Output N-back subject-level data

In [12]:
nback_df.to_csv(os.path.join(derivs_dir,'subjectwise','nback_subject_level.csv'))