In [None]:
#This script creates a binary spreadsheet of all participants enrolled in EF and whether or not their 
#pre/post scan SR data is complete 
#Inputs:
    #enrollment sheets pulled from AXIS, stored at afp://saturn/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs
    #merged sheet of all pre/post scan scales, stored at afp://saturn/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/outputs
#Outputs:
    #a binary spreadsheet of pre/post scan SR data collected for T1 participants enrolled prior to April 1st, 2022 
    #a binary spreadsheet of pre/post scan SR data collected for T2 participants enrolled prior to April 1st, 2022 

In [1]:
import pandas as pd

In [2]:
#read in T1 enrollment
axis_t1=pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs/axis_enroll_t1.csv',dtype=str)
axis_t1=axis_t1.drop(columns=['scan_1_date'])

In [3]:
#reformat 
t1_enroll=axis_t1['bblid']
t1_enroll=t1_enroll.tolist()
t1_enroll = [str(t) for t in t1_enroll]

In [7]:
#read in prescan SR data
pre_scan=pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/outputs/EF_all_prescan_scales.csv', dtype=str)

In [9]:
#sort for t1 scales
pre_scan_t1 = pre_scan[pre_scan.timepoint.eq('1')]

In [10]:
#list ID's that have completed scales 
hasSTATE=[]
hasTRAIT=[]

for index, row in pre_scan_t1.iterrows():
    for t in t1_enroll:
        if (t in row['bblid']) and ('2' in (row['stai_state_complete'])):
            #print(t, "state!!")
            hasSTATE.append(t)
        if (t in row['bblid']) and ('2' in (row['stai_trait_complete'])):
            hasTRAIT.append(t)

In [11]:
#create a dataframe with all enrolled ID's
audit = pd.DataFrame (t1_enroll, columns = ['BBLID'])
print(len(audit))

173


In [12]:
state=[]
trait=[]

for val in audit.values:
    if val in hasSTATE:
        state.append(1)
    if val not in hasSTATE:
        state.append(0)
    if val in hasTRAIT:
        trait.append(1)
    if val not in hasTRAIT:
        trait.append(0)

In [95]:
#and for post...

In [14]:
#read in postscan SR data
post_scan=pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/outputs/EF_all_postscan_scales.csv', dtype=str)

In [15]:
#sort for t1 scales
post_scan_t1 = post_scan[post_scan.timepoint.eq('1')]

In [16]:
#list ID's that have completed scales 
hasSTATE2=[]

for index, row in post_scan_t1.iterrows():
    for t in t1_enroll:
        if (t in row['bblid']) and ('2' in (row['stai_state_complete'])):
            hasSTATE2.append(t)

In [17]:
state2=[]

for val in audit.values:
    if val in hasSTATE2:
        state2.append(1)
    if val not in hasSTATE2:
        state2.append(0)

In [18]:
audit['pre_stai_state']=state
audit['pre_stai_trait']=trait
audit['post_stai_state']=state2

In [19]:
audit.head(n=5)

Unnamed: 0,BBLID,pre_stai_state,pre_stai_trait,post_stai_state
0,19861,1,1,1
1,20124,1,1,0
2,20125,1,1,0
3,20139,1,1,0
4,20141,1,1,1


In [27]:
audit.to_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/audits/EF_T1_scan_scales_audit.csv', sep = ',', index=False)

In [None]:
#and T2 

In [28]:
#read in T2 enrollment
axis_t2=pd.read_csv('data/axis_enroll_t2.csv',dtype=str)
axis_t2=axis_t2.drop(columns=['scan_2_date'])

In [29]:
#reformat 
t2_enroll=axis_t2['bblid']
t2_enroll=t2_enroll.tolist()
t2_enroll = [str(t) for t in t2_enroll]

In [30]:
#sort for t2 scales
pre_scan_t2 = pre_scan[pre_scan.timepoint.eq('2')]

In [31]:
#list ID's that have completed scales 
hasSTATE=[]
hasTRAIT=[]

for index, row in pre_scan_t2.iterrows():
    for t in t2_enroll:
        if (t in row['bblid']) and ('2' in (row['stai_state_complete'])):
            hasSTATE.append(t)
        if (t in row['bblid']) and ('2' in (row['stai_trait_complete'])):
            hasTRAIT.append(t)

In [32]:
#create a dataframe with all enrolled ID's
audit2 = pd.DataFrame (t2_enroll, columns = ['BBLID'])
print(len(audit))

37


In [33]:
state=[]
trait=[]

for val in audit.values:
    if val in hasSTATE:
        state.append(1)
    if val not in hasSTATE:
        state.append(0)
    if val in hasTRAIT:
        trait.append(1)
    if val not in hasTRAIT:
        trait.append(0)

In [34]:
#and for post...

In [36]:
#sort for t2 scales
post_scan_t2 = post_scan[post_scan.timepoint.eq('2')]

In [37]:
#list ID's that have completed scales 
hasSTATE2=[]

for index, row in post_scan_t2.iterrows():
    for t in t2_enroll:
        if (t in row['bblid']) and ('2' in (row['stai_state_complete'])):
            hasSTATE2.append(t)

In [38]:
state2=[]

for val in audit.values:
    if val in hasSTATE2:
        state2.append(1)
    if val not in hasSTATE2:
        state2.append(0)

In [39]:
audit2['pre_stai_state']=state
audit2['pre_stai_trait']=trait
audit2['post_stai_state']=state2

In [40]:
audit2.head(n=5)

Unnamed: 0,BBLID,pre_stai_state,pre_stai_trait,post_stai_state
0,20139,1,1,1
1,20149,1,1,1
2,20188,1,1,1
3,20214,1,1,0
4,20238,1,1,0


In [41]:
audit2.to_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/audits/EF_T2_scan_scales_audit.csv', sep = ',', index=False)