In [None]:
#This script creates a binary spreadsheet of all participants enrolled in EF and whether or not their 
#varability data is complete
#Inputs:
    #enrollment sheets pulled from AXIS, stored at afp://saturn/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs
    #raw task files, stored on Penn+Box
#Outputs:
    #a binary spreadsheet of varability data collected for T1 participants enrolled prior to April 1st, 2022 
    #a binary spreadsheet of varability data collected for T2 participants enrolled prior to April 1st, 2022 

In [2]:
import pandas as pd
import glob

In [3]:
#read in T1 enrollment
axis_t1=pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs/axis_enroll_t1.csv')
axis_t1=axis_t1.drop(columns=['scan_1_date'])
#axis_t1

In [4]:
#reformat 
t1_enroll=axis_t1['bblid']
t1_enroll=t1_enroll.tolist()
t1_enroll = [str(t) for t in t1_enroll]

In [5]:
#set directory containing all variability files 
root_dir = '/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/VARIABILITY/' #change file path for appropriate user 
bids_dir = 'TIMEPOINT 1/'
all_files1=glob.glob(root_dir + bids_dir + '*/*')
#print(all_files1)

In [6]:
#sort out by task type 
has_dominant=[]
has_reaction=[]
has_cpt=[]
has_cpt_g=[]
has_simple_p=[]
has_complex_p=[]
has_complex_r=[]
for f in all_files1:
    if 'Dominant.log' in f:
        x=f.split('/')[7]
        has_dominant.append(x)
    if 'Reaction.log' in f:
        x=f.split('/')[7]
        has_reaction.append(x)
    if 'CPT.log' in f:
        x=f.split('/')[7]
        has_cpt.append(x)
    if 'CPT_gordon.log' in f:
        x=f.split('/')[7]
        has_cpt_g.append(x)
    if 'complex_reasoning.log' in f:
        x=f.split('/')[7]
        has_complex_r.append(x)
    if 'complex_processing_speed.log' in f:
        x=f.split('/')[7]
        has_complex_p.append(x)
    if 'simple_processing_speed.log' in f:
        x=f.split('/')[7]
        has_simple_p.append(x)

In [8]:
audit = pd.DataFrame (t1_enroll, columns = ['BBLID'])
#print(len(audit))

In [9]:
#sort out by task type 
dominant=[]
reaction=[]
cpt=[]
cpt_g=[]
simple_p=[]
complex_p=[]
complex_r=[]

for t in t1_enroll:
    if str(t) in [x for x in has_dominant]:
        dominant.append(1)
    if str(t) not in [x for x in has_dominant]:
        dominant.append(0)
    if str(t) in [x for x in has_reaction]:
        reaction.append(1)
    if str(t) not in [x for x in has_reaction]:
        reaction.append(0)
    if str(t) in [x for x in has_cpt]:
        cpt.append(1)
    if str(t) not in [x for x in has_cpt]:
        cpt.append(0)
    if str(t) in [x for x in has_cpt_g]:
        cpt_g.append(1)
    if str(t) not in [x for x in has_cpt_g]:
        cpt_g.append(0)
    if str(t) in [x for x in has_simple_p]:
        simple_p.append(1)
    if str(t) not in [x for x in has_simple_p]:
        simple_p.append(0)
    if str(t) in [x for x in has_complex_p]:
        complex_p.append(1)
    if str(t) not in [x for x in has_complex_p]:
        complex_p.append(0)
    if str(t) in [x for x in has_complex_r]:
        complex_r.append(1)
    if str(t) not in [x for x in has_complex_r]:
        complex_r.append(0)

In [10]:
audit['dominant']=dominant
audit['reaction']=reaction
audit['cpt']=cpt
audit['cpt_g']=cpt_g
audit['simple_p']=simple_p
audit['complex_p']=complex_p
audit['complex_r']=complex_r

In [11]:
audit.head(n=5)

Unnamed: 0,BBLID,dominant,reaction,cpt,cpt_g,simple_p,complex_p,complex_r
0,19861,1,1,1,1,1,1,1
1,20124,0,0,0,0,0,0,0
2,20125,1,1,1,1,0,0,0
3,20139,1,1,1,1,1,1,1
4,20141,1,1,1,1,1,1,1


In [12]:
audit.to_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/audits/EF_T1_variability_audit.csv', sep = ',', index=False)

In [50]:
#and for T2

In [13]:
#read in t2 enrollment
axis_t2=pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs/axis_enroll_t2.csv')
axis_t2=axis_t2.drop(columns=['scan_2_date'])
#axis_t2

In [14]:
t2_enroll=axis_t2['bblid']
t2_enroll=t2_enroll.tolist()
t2_enroll = [str(t) for t in t2_enroll]

In [15]:
#set directory containing all variability files 
root_dir = '/Users/krmurtha/Box Sync/EXECUTIVE_FUNCTION/VARIABILITY/' #change file path for appropriate user 
bids_dir = 'TIMEPOINT 2/'
all_files2=glob.glob(root_dir + bids_dir + '*/*')
#print(all_files2)

In [16]:
#sort out by task type 
has_dominant=[]
has_reaction=[]
has_cpt=[]
has_cpt_g=[]
has_simple_p=[]
has_complex_p=[]
has_complex_r=[]
for f in all_files2:
    if 'Dominant.log' in f:
        x=f.split('/')[7]
        y=x.split('_')[0]
        has_dominant.append(y)
    if 'Reaction.log' in f:
        x=f.split('/')[7]
        y=x.split('_')[0]
        has_reaction.append(y)
    if 'CPT.log' in f:
        x=f.split('/')[7]
        y=x.split('_')[0]
        has_cpt.append(y)
    if 'CPT_gordon.log' in f:
        x=f.split('/')[7]
        y=x.split('_')[0]
        has_cpt_g.append(y)
    if 'complex_reasoning.log' in f:
        x=f.split('/')[7]
        y=x.split('_')[0]
        has_complex_r.append(y)
    if 'complex_processing_speed.log' in f:
        x=f.split('/')[7]
        y=x.split('_')[0]
        has_complex_p.append(y)
    if 'simple_processing_speed.log' in f:
        x=f.split('/')[7]
        y=x.split('_')[0]
        has_simple_p.append(y)

In [17]:
audit2 = pd.DataFrame (t2_enroll, columns = ['BBLID'])
#print(len(audit))

In [18]:
#sort out by task type 
dominant=[]
reaction=[]
cpt=[]
cpt_g=[]
simple_p=[]
complex_p=[]
complex_r=[]

for t in t2_enroll:
    if str(t) in [x for x in has_dominant]:
        dominant.append(1)
    if str(t) not in [x for x in has_dominant]:
        dominant.append(0)
    if str(t) in [x for x in has_reaction]:
        reaction.append(1)
    if str(t) not in [x for x in has_reaction]:
        reaction.append(0)
    if str(t) in [x for x in has_cpt]:
        cpt.append(1)
    if str(t) not in [x for x in has_cpt]:
        cpt.append(0)
    if str(t) in [x for x in has_cpt_g]:
        cpt_g.append(1)
    if str(t) not in [x for x in has_cpt_g]:
        cpt_g.append(0)
    if str(t) in [x for x in has_simple_p]:
        simple_p.append(1)
    if str(t) not in [x for x in has_simple_p]:
        simple_p.append(0)
    if str(t) in [x for x in has_complex_p]:
        complex_p.append(1)
    if str(t) not in [x for x in has_complex_p]:
        complex_p.append(0)
    if str(t) in [x for x in has_complex_r]:
        complex_r.append(1)
    if str(t) not in [x for x in has_complex_r]:
        complex_r.append(0)

In [19]:
audit2['dominant']=dominant
audit2['reaction']=reaction
audit2['cpt']=cpt
audit2['cpt_g']=cpt_g
audit2['simple_p']=simple_p
audit2['complex_p']=complex_p
audit2['complex_r']=complex_r

In [20]:
audit2.head(n=5)

Unnamed: 0,BBLID,dominant,reaction,cpt,cpt_g,simple_p,complex_p,complex_r
0,20139,1,1,0,1,0,0,1
1,20149,0,0,0,0,0,0,0
2,20188,0,0,0,0,0,0,0
3,20214,1,1,1,1,0,0,1
4,20238,1,1,0,1,0,0,1


In [21]:
audit2.to_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/audits/EF_T2_variability_audit.csv', sep = ',', index=False)