In [1]:
from ezdiff import ezdiff
import pandas as pd
import numpy as np
from glob import glob
import os

In [1]:
project_dir = '/Users/tonyb/Documents/Networking_Pilot/'
raw_dir = project_dir + 'all_data/'
explore_dir = project_dir + 'explore_n33/'

### Import data and remove duplicated columns, mean and STD rows, and prefix from colnames

In [2]:
rt_df = pd.read_csv(explore_dir+'ezdiff/concat_separate_conditions_RTs_add0.5error_forACC=1.csv',index_col=0)

In [3]:
#pd.read_csv() automatically adds '.1' to duplicated columns. Remove here
rt_df = rt_df.drop([i for i in rt_df.columns if '.1' in i],axis=1)
rt_df = rt_df.iloc[2:,:]

In [4]:
#remove prefix from colnames for ease of indexing b/w 2 dataframes later
rt_new_columns = []
for i in range(len(rt_df.columns)):
    rt_new_columns.append('_'.join(rt_df.columns[i].split('rt)_')[1:]))
rt_df.columns = rt_new_columns

In [5]:
rt_df.shape

(33, 128)

In [6]:
acc_df = pd.read_csv(explore_dir+'ezdiff/concat_separate_conditions_Accuracy_add0.5error_forACC=1.csv',index_col=0)

In [7]:
acc_df = acc_df.drop([i for i in acc_df.columns if '.1' in i],axis=1)
acc_df = acc_df.iloc[2:,:]

In [8]:
acc_new_columns = []
for i in range(len(acc_df.columns)):
    acc_new_columns.append('_'.join(acc_df.columns[i].split('acc_')[1:]))
acc_df.columns = acc_new_columns

In [9]:
acc_df.shape

(33, 128)

### Wagermakers et al.'s (2007) EZ-Diffusion

In [10]:
ez_df = pd.DataFrame(columns=rt_new_columns)
for i in rt_df.columns:
    len_NaN_RTs = len(rt_df.loc[rt_df[i].isnull() , i])
    len_NaN_ACCs = len(acc_df.loc[acc_df[i].isnull() , i])
    ez_df.loc['NaN_RTs',i] = len_NaN_RTs
    ez_df.loc['NaN_ACCs',i] = len_NaN_ACCs
    #ezdiff() converts MRT to seconds and VRT to squared seconds
    temp = ezdiff(rt_df.iloc[:33,rt_df.columns.get_loc(i)],acc_df.iloc[:33,acc_df.columns.get_loc(i)])
    ez_df.loc['MRT',i] = temp[0] #mean of RTs (in seconds)
    ez_df.loc['VRT',i] = temp[1] #variance of RTs (in seconds)
    ez_df.loc['PC',i] = temp[2] #Percentage of correct responses
    ez_df.loc['a',i] = temp[3] #Boundary separation a
    ez_df.loc['v',i] = temp[4] #Driff rate v
    ez_df.loc['t',i] = temp[5] #Nondecision time Ter (in seconds)

In [11]:
ez_df.shape

(8, 128)

##### This will write, altogether into one sheet, conditions/variables from all tasks with no repetition of those from dual tasks (i.e. 8 single tasks + combination 8 choose 2)

In [12]:
#remove SS:stop because stop-signal tasks use tracking mechanisms that keep stop-fail rate at 0.5
#remove GNG:nogo because there are many NaN_RTs in nogo conditions (i.e., too few nongo fails)
with pd.ExcelWriter(explore_dir+'ezdiff/ezdiff_add0.5error_forACC=1_removeSS-stop_removeGNG-nogo.xlsx', engine="openpyxl") as writer:
    ez_df[[i for i in ez_df.columns if ('SS:stop' not in i) and ('GNG:nogo' not in i)]].to_excel(writer, sheet_name='all_tasks')

##### This will write each task cluster (1 single task, followed by all 7 related dual tasks) into a separate worksheet

In [13]:
#remove SS:stop because stop-signal tasks use tracking mechanisms that keep stop-fail rate at 0.5
#remove GNG:nogo because there are many NaN_RTs in nogo conditions (i.e., too few nongo fails)
ez_df = ez_df[[i for i in ez_df.columns if ('SS:stop' not in i) and ('GNG:nogo' not in i)]]

for task in ['CUE:','DF:','FLANKER:','GNG:','DELAY:','PREDICT:','SHAPE:','SS:']:
    task_cols = [col for col in ez_df.columns if task in col]
    if task == 'CUE:': 
        #cued_task_switching_single_task's columns are in format: TASK:..._&_CUE:..., 
        #unlike other single tasks, whose columns are in format, e.g.: DF:...
        single_cols = [single for single in task_cols if len(single.split('&'))< 3]
    else:
        single_cols = [single for single in task_cols if '&' not in single]
    dual_cols = [dual for dual in task_cols if dual not in single_cols]
    
    task_df = ez_df[single_cols+dual_cols]
    if task == 'DELAY:':
        task = 'NBACK:'
    #write to separate sheets in same Excel workbook
    with pd.ExcelWriter(explore_dir+'ezdiff/ezdiff_add0.5error_forACC=1_removeSS-stop_removeGNG-nogo.xlsx', engine="openpyxl", mode='a') as writer:
        task_df.to_excel(writer, sheet_name='%s' %task[:-1])
    #write to separate .csv files
    #task_df.to_csv('/Users/tonyb/Desktop/temp/%s.csv' %task[:-1],index=True)

##### If wrote to separate .csv files, this is to check for completeness (e.g., same # of columns generated or not; single columns come first or not)

In [14]:
import sys
sys.path.append('/Users/tonyb/Documents/')
from tb_utils.tb_utils import *

In [15]:
for file in ordered_glob('/Users/tonyb/Desktop/temp/*'):
    task = os.path.basename(file).split('.')[0]
    df = pd.read_csv(file)
    print('\n%s; %s' %(task,len(df.columns)))
    for i in df.columns[1:5]:
        print(i)


CUE; 27
TASK:stay_&_CUE:stay
TASK:switch_&_CUE:switch
TASK:stay_&_CUE:stay_&_DF:con
TASK:stay_&_CUE:stay_&_DF:neg

DF; 27
DF:con
DF:neg
TASK:stay_&_CUE:stay_&_DF:con
TASK:stay_&_CUE:stay_&_DF:neg

Flanker; 27
FLANKER:congruent
FLANKER:incongruent
TASK:stay_&_CUE:stay_&_FLANKER:congruent
TASK:stay_&_CUE:stay_&_FLANKER:incongruent

GNG; 15
GNG:go
TASK:stay_&_CUE:stay_&_GNG:go
TASK:switch_&_CUE:switch_&_GNG:go
DF:con_&_GNG:go

N-back; 27
DELAY:1
DELAY:2
TASK:stay_&_CUE:stay_&_DELAY:1
TASK:stay_&_CUE:stay_&_DELAY:2

PREDICT; 27
PREDICT:stay
PREDICT:switch
TASK:stay_&_CUE:stay_&_PREDICT:stay
TASK:stay_&_CUE:stay_&_PREDICT:switch

SHAPE; 27
SHAPE:CONTROL
SHAPE:DISTRACTOR
TASK:stay_&_CUE:stay_&_SHAPE:CONTROL
TASK:stay_&_CUE:stay_&_SHAPE:DISTRACTOR

SS; 15
SS:go
TASK:stay_&_CUE:stay_&_SS:go
TASK:switch_&_CUE:switch_&_SS:go
DF:con_&_SS:go
