In [None]:
#This script creates a binary spreadsheet of all participants enrolled in EF and whether or not their imaginig 
#sequences are complete. 
#Inputs:
    #enrollment sheets pulled from AXIS, stored at afp://saturn/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs
    #outputs of Flaudit gear (bids.csv, seqinfo.csv) stored at afp://saturn/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs
#Outputs:
    #a binary spreadsheet of imaging data collected for T1 participants enrolled prior to April 1st, 2022 
    #a binary spreadsheet of imaging data collected for T2 participants enrolled prior to April 1st, 2022 

In [2]:
import pandas as pd
import re

In [3]:
axis_t1=pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs/axis_enroll_t1.csv')
axis_t1=axis_t1.drop(columns=['scan_1_date'])
#axis_t1

In [4]:
t1_enroll=axis_t1['bblid']
t1_enroll=t1_enroll.tolist()
t1_enroll = [str(t) for t in t1_enroll]

In [5]:
scan_id=axis_t1['scan_id_timepoint_1']
scan_id=scan_id.tolist()
scan_id=[str(t) for t in scan_id]

In [6]:
#make a list of T1 scan IDs
t1_scan=[]
for s in scan_id:
    if 'nan' not in s:
        t=s.split('.')[0]
        #print(t)
        t1_scan.append(t)
print(t1_scan)

['11012', '10960', '10959', '11138', '11000', '11003', '11140', '11115', '11209', '11126', '11112', '11316', '11143', '11142', '11249', '11127', '11290', '11291', '11208', '11206', '11159', '11242', '11207', '11160', '11161', '11146', '11132', '11184', '11147', '11221', '11180', '11210', '11211', '11205', '11233', '11217', '11238', '11276', '11302', '11289', '11264', '11324', '11320', '11370', '11321', '11319', '11332', '11334', '11365', '11375', '11351', '11359', '11404', '11405', '11366', '11392', '11396', '11376', '11381', '11399', '11397', '11385', '11386', '11388', '11387', '11417', '11440', '11441', '11416', '11433', '11443', '11419', '11436', '11452', '11453', '11438', '11448', '11451', '11455', '11460', '11475', '11468', '11148', '11577', '11473', '11465', '11640', '11574', '11488', '11720', '11492', '11541', '11576', '11798', '11625', '11651', '11702', '11664', '11685', '11794', '11829', '11723', '11780', '11824', '11825', '11750', '11779', '11755', '11751', '11793', '11781', 

In [7]:
#read in BIDS data from flaudit 
bids = pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs/flaudit_bids.csv')

In [8]:
#first, we reformat the sheet with new variables of interest
new = bids["Filename"].str.split("-", n = 1, expand = True)
  
# making separate first name column from new data frame
bids["BBLID"]= new[1]

# making separate last name column from new data frame
bids["File"]= new[1]
  
#split again
new2 = bids["BBLID"].str.split("_", n = 1, expand = True)
new3 = bids["File"].str.split("_", n=2, expand=True)
  
# making separate first name column from new data frame
bids["BBLID"]= new2[0]
bids["File"]=new3[2]

In [9]:
#next, we drop blank columns
bids = bids[bids['Filename'].notna()]

#and we filter by session id to ensure we get only t1 sessions 
bids_t1 = bids[bids['session_id'].isin(t1_scan)]

In [10]:
bids_t1 = bids_t1.reset_index()  # make sure indexes pair with number of rows

In [11]:
#iterate through BIDS output to determine if each BBLID has data that matches a specific file name:
hast1=[]
hast2=[]
hasdwi=[]
hasrest1=[]
hasrest2=[]
hasfrac=[]
hasasl=[]

for index, row in bids_t1.iterrows():
    for t in t1_enroll:
        if (t in row['BBLID']) and ('T1w.nii.gz' in (row['File'])):
            #print(t, "t1!")
            hast1.append(t)
        if (t in row['BBLID']) and ('T2w.nii.gz' in (row['File'])):
            hast2.append(t)
        if (t in row['BBLID']) and ('multiband_dwi.nii.gz' in (row['File'])):
            hasdwi.append(t)
        if (t in row['BBLID']) and ('task-restbold_run-1_bold.nii.gz' in (row['File'])):
            hasrest1.append(t)
        if (t in row['BBLID']) and ('task-restbold_run-2_bold.nii.gz' in (row['File'])):
            hasrest2.append(t)
        if (t in row['BBLID']) and ('task-fracback_acq-singleband_bold.nii.gz' in (row['File'])):
            hasfrac.append(t)
        if (t in row['BBLID']) and ('asl.nii.gz' in (row['File'])):
            hasasl.append(t)

In [12]:
#create a dataframe with all enrolled ID's
audit = pd.DataFrame (t1_enroll, columns = ['BBLID'])
print(len(audit))

173


In [13]:
#as qsm is currently not in BIDS, we use the SEQINFO output from flaudit to check QSM
seqinfo = pd.read_csv('data/seqinfo.csv')
qsm_names = ['qsm_acq-1.5mm_GRE', 'QSM_SWI_1.5mm']
hasqsm = seqinfo[seqinfo['series_description'].isin(qsm_names)]
hasqsm= hasqsm[hasqsm['session_id'].isin(t1_scan)]

In [14]:
#change type of variable to string so that the loop works
hasqsm = hasqsm.astype({"patient_id": str})

In [15]:
#now, iterate through list of ID's to determine if each scan has each acquisition, and create a binary list.

t1=[]
t2=[]
dwi=[]
rest1=[]
rest2=[]
frac=[]
asl=[]

for val in audit.values:
    if val in hast1:
        t1.append(1)
    if val not in hast1:
        t1.append(0)
    if val in hast2:
        t2.append(1)
    if val not in hast2:
        t2.append(0)
    if val in hasdwi:
        dwi.append(1)
    if val not in hasdwi:
        dwi.append(0)
    if val in hasrest1:
        rest1.append(1)
    if val not in hasrest1:
        rest1.append(0)
    if val in hasrest2:
        rest2.append(1)
    if val not in hasrest2:
        rest2.append(0)
    if val in hasfrac:
        frac.append(1)
    if val not in hasfrac:
        frac.append(0)
    if val in hasasl:
        asl.append(1)
    if val not in hasasl:
        asl.append(0)
    #if val in [row for row in qsm['patient_id']]:
        #qsm.append(1)
    #if val not in [row for row in qsm['patient_id']]:
        #qsm.append(0)

In [16]:
#iterate through list of ID's with QSM in the same way. 
qsm=[]
for val in audit.values:
    if val in [row for row in hasqsm['patient_id']]:
        qsm.append(1)
    else:
        qsm.append(0)

In [17]:
#reformat scan ID column to match up in the audit fields. 
scanid1=[]
for s in scan_id:
    t=s.split('.')[0]
    #print(t)
    scanid1.append(t)

In [18]:
#fill in the rest of the spreadsheet
audit['scanid']=scanid1
audit['t1']=t1
audit['t2']=t2
audit['dwi']=dwi
audit['rest1']=rest1
audit['frac']=frac
audit['rest2']=rest2
audit['asl']=asl
audit['qsm']=qsm

In [21]:
audit.head(n=5)

Unnamed: 0,BBLID,scanid,t1,t2,dwi,rest1,frac,rest2,asl,qsm
0,19861,11012,1,1,1,1,1,1,1,1
1,20124,10960,1,1,1,1,0,0,0,0
2,20125,10959,1,1,1,1,0,0,0,0
3,20139,11138,1,1,1,1,1,1,1,1
4,20141,11000,1,1,1,1,1,1,1,1


In [22]:
audit.to_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/audits/EF_T1_imaging_audit.csv', sep = ',', index=False)

In [None]:
#now for T2...

In [23]:
axis_t2=pd.read_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/inputs/axis_enroll_t2.csv')
axis_t2=axis_t2.drop(columns=['scan_2_date'])
#axis_t2

In [24]:
t2_enroll=axis_t2['bblid']
t2_enroll=t2_enroll.tolist()
t2_enroll = [str(t) for t in t2_enroll]

In [25]:
scan_id2=axis_t2['scan_id_t2']
scan_id2=scan_id2.tolist()
scan_id2=[str(t) for t in scan_id2]

In [26]:
t2_scan=[]
for s in scan_id2:
    if 'nan' not in s:
        t=s.split('.')[0]
        #print(t)
        t2_scan.append(t)
print(t2_scan)

['11657', '11831', '11594', '11659', '11612', '11608', '11608', '11584', '11823', '11603', '11604', '11635', '11678', '11660', '11631', '12098', '11679', '11738', '11771', '11694', '11474', '11692', '11719', '11881', '11882', '11835', '11946', '12032', '12004', '11897', '12063', '11873', '11891', '12060', '11931', '12044', '12143']


In [27]:
#and we filter by session id to ensure we get only t1 sessions 
bids_t2 = bids[bids['session_id'].isin(t2_scan)]

In [28]:
#iterate through BIDS output to determine if each BBLID has data that matches a specific file name:
hast1=[]
hast2=[]
hasdwi=[]
hasrest1=[]
hasrest2=[]
hasfrac=[]
hasasl=[]

for index, row in bids_t2.iterrows():
    for t in t1_enroll:
        if (t in row['BBLID']) and ('T1w.nii.gz' in (row['File'])):
            #print(t, "t1!")
            hast1.append(t)
        if (t in row['BBLID']) and ('T2w.nii.gz' in (row['File'])):
            hast2.append(t)
        if (t in row['BBLID']) and ('multiband_dwi.nii.gz' in (row['File'])):
            hasdwi.append(t)
        if (t in row['BBLID']) and ('task-restbold_run-1_bold.nii.gz' in (row['File'])):
            hasrest1.append(t)
        if (t in row['BBLID']) and ('task-restbold_run-2_bold.nii.gz' in (row['File'])):
            hasrest2.append(t)
        if (t in row['BBLID']) and ('task-fracback_acq-singleband_bold.nii.gz' in (row['File'])):
            hasfrac.append(t)
        if (t in row['BBLID']) and ('asl.nii.gz' in (row['File'])):
            hasasl.append(t)

In [29]:
#create a dataframe with all enrolled ID's
audit2 = pd.DataFrame (t2_enroll, columns = ['BBLID'])
print(len(audit2))

37


In [30]:
#iterate through list of ID's w QSM to create binary list 
qsm=[]
for val in audit2.values:
    if val in [row for row in hasqsm['patient_id']]:
        qsm.append(1)
    else:
        qsm.append(0)

In [31]:
#now, iterate through list of ID's using each sequence to create binary list 

t1=[]
t2=[]
dwi=[]
rest1=[]
rest2=[]
frac=[]
asl=[]

for val in audit2.values:
    if val in hast1:
        #print('yay!')
        t1.append(1)
    if val not in hast1:
        t1.append(0)
    if val in hast2:
        t2.append(1)
    if val not in hast2:
        t2.append(0)
    if val in hasdwi:
        dwi.append(1)
    if val not in hasdwi:
        dwi.append(0)
    if val in hasrest1:
        rest1.append(1)
    if val not in hasrest1:
        rest1.append(0)
    if val in hasrest2:
        rest2.append(1)
    if val not in hasrest2:
        rest2.append(0)
    if val in hasfrac:
        frac.append(1)
    if val not in hasfrac:
        frac.append(0)
    if val in hasasl:
        asl.append(1)
    if val not in hasasl:
        asl.append(0)
    #if val in [row for row in qsm['patient_id']]:
        #qsm.append(1)
    #if val not in [row for row in qsm['patient_id']]:
        #qsm.append(0)

In [32]:
audit2['scanid']=t2_scan
audit2['t1']=t1
audit2['t2']=t2
audit2['dwi']=dwi
audit2['rest1']=rest1
audit2['frac']=frac
audit2['rest2']=rest2
audit2['asl']=asl
audit2['qsm']=qsm

In [33]:
audit2.head(n=5)

Unnamed: 0,BBLID,scanid,t1,t2,dwi,rest1,frac,rest2,asl,qsm
0,20139,11657,1,1,1,1,1,1,1,1
1,20149,11831,1,1,1,1,1,1,1,1
2,20188,11594,1,1,1,1,1,1,1,1
3,20214,11659,1,1,1,1,1,1,1,1
4,20238,11612,1,1,1,1,1,1,1,1


In [34]:
audit2.to_csv('/Volumes/Coordinators/Protocols/TED_PROTOCOLS/EXECUTIVE_829744/2022_data_freeze/audits/EF_T2_imaging_audit.csv', sep = ',', index=False)