In [45]:
#this script checks for missing and complete physio data and audio recordings for participants
#this is currently set up for the AOL study REDCap measures
#
#TO USE: 
#put all data in whatever directory you would like. make sure the directories below are pointed to the correct spot
#make sure the file you want will be the most recent version in that directory (all items will be sorted)
#update the participants you need to check in the current_subs file
#save the file to whatever directory works best for you
#
# BP 8/14/20 

#SET UP#

#calling libraries
import pandas as pd
import xlwt
from xlwt import Workbook
import os
import fnmatch
from os import listdir
from os.path import isfile, join
from datetime import datetime
import re

#date the data was downloaded (may not be the day this script is run)
data_date="2020-08-14"

#list of updated subs you want to check
#script will not run if you try to add a subject that does not exist on redcap
subs_filename = "current_subs.csv" 

#directory where the current_subs file is stored. Must have double \\ not just a single \. Must have \\ at end of pathway
sub_dir= 'F:\\Data Backup\\data checks\\'
#directory where data is stored. Must have double \\ not just a single \. Must have \\ at end of pathway
main_data_dir = 'F:\\Data Backup\\'
#directory where you want to save the check output files. Must have double \\ not just a single \. Must have \\ at end of pathway
save_dir = 'F:\\Data Backup\\data checks\\output\\'
#directory where redcap data is saved. Must have double \\ not just a single \. Must have \\ at end of pathway
redcap_dir = 'F:\\Data Backup\\REDCap Data\\'

audio_safer_dir = main_data_dir + 'Audio Recordings\\SAFER Recordings'
audio_debrief_dir = main_data_dir + 'Audio Recordings\\Debriefing Recordings'
audio_craft_story_dir = main_data_dir + 'Audio Recordings\\CRAFT STORY RECALL RECORDING'

face_dir = main_data_dir + 'FACE Task Data\\'
fract_dir = main_data_dir + 'FrACT Data\\'
interoception_dir = main_data_dir + 'Interoception Data\\'
ospan_dir = main_data_dir + 'OSPAN Data\\'
video_dir = main_data_dir + 'Lie Video Task Data\\'
sigt_dir = main_data_dir + 'SIGT\\'

mri_scan_dir = main_data_dir + 'MRI Data\\scan data\\'
mri_behav_dir = main_data_dir + 'MRI Data\\behavioral data\\'

#selecting the most recent redcap file
#listing all files in redcap_dir
all_redcap_files = [f for f in listdir(redcap_dir) if isfile(join(redcap_dir, f))]
#sorting list of files
all_redcap_files.sort(reverse=True)
#selecting top file (most recent date)
redcap_file=all_redcap_files[0]

#session names in redcap checklists -- to see if session has been completed yet
sv = "screening_visit_remote_experimenter_checklist_complete"
follow_up_1 = "followup_visit_1_remote_experimenter_checklist_complete"
follow_up_2 = "followup_visit_2_remote_experimenter_checklist_complete"
physio = "physio_visit_on_campus_experimenter_checklist_complete"
MRI ="mri_visit_on_campus_experimenter_checklist_complete"
scd_participant = "scd_visit_participant_remote_experimenter_checklis_complete"
scd_informant ="scd_visit_informant_remote_experimenter_checklist_complete"

#opening workbooks to write results
wb1_audio = Workbook()
wb2_audio = Workbook()

wb1_physio = Workbook()
wb2_physio = Workbook()

wb1_mri = Workbook()
wb2_mri = Workbook()

#reading in the current subs file
col_list = ["subIDs"]
df = pd.read_csv(r'' + sub_dir + subs_filename, usecols=col_list) #read in file
df["subIDs"] = df["subIDs"].apply(str)

#turning excel column into list so we can manipulate the data
df_updating_subs = df["subIDs"].tolist()

In [46]:
#Audio recording -- SAFER

measure_dir = audio_safer_dir
session = follow_up_2
file_type = '_SAFER.m4a'
save_name_complete = 'audio_present_' + data_date + '.xls'
save_name_missing = 'audio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('SAFER recording complete')
sheet2 = wb2_audio.add_sheet('SAFER recording missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'subID')
sheet2.write(0,0,'subID')

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()

#checking whole path to find sub file in directory
for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find = measure_dir + '\\' + x + file_type #writing file to check for, based on looping through current subs
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find)==True:
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 0, x) 
        else:
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 
        
#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [47]:
#Audio recording -- Debriefing

measure_dir = audio_debrief_dir
session = follow_up_2
file_type = '_Debrief.m4a'

save_name_complete = 'audio_present_' + data_date + '.xls'
save_name_missing = 'audio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('Debrief recording complete')
sheet2 = wb2_audio.add_sheet('Debrief recording missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'subID')
sheet2.write(0,0,'subID')

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()   

#checking whole path to find sub file in directory
for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find = measure_dir + '\\' + x + file_type #writing file to check for, based on looping through current subs
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find)==True:
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 0, x) 
        else:
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 
        
#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [48]:
#Audio recording -- NACC Craft Story Recall

measure_dir = audio_craft_story_dir
session = scd_participant
file_type = '_story.m4a'

save_name_complete = 'audio_present_' + data_date + '.xls'
save_name_missing = 'audio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('NACC story recording complete')
sheet2 = wb2_audio.add_sheet('NACC story recording missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'subID')
sheet2.write(0,0,'subID')

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()   

#checking whole path to find sub file in directory
for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find = measure_dir + '\\' + x + file_type #writing file to check for, based on looping through current subs
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find)==True:
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 0, x) 
        else:
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 
        
#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [49]:
#FACE task
#behavioral and eyetracking

measure_dir = face_dir
session = physio
#file_type = '' #not needed for FACE task

save_name_complete = 'physio_present_' + data_date + '.xls'
save_name_missing = 'physio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('FACE data complete')
sheet2 = wb2_audio.add_sheet('FACE data missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'subID')
sheet2.write(0,0,'subID')

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()   

#checking whole path to find sub file in directory
for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find_run1 = measure_dir + '\\' + x + '_1' #writing file to check for, based on looping through current subs
    file_to_find_run2 = measure_dir + '\\' + x + '_2'
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find_run1)==True & os.path.exists(file_to_find_run2)==True: #if both files are present, mark in complete
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 0, x) 
        if os.path.exists(file_to_find_run1)==False: #if run 1 is missing, marking in missing excel file
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 
            sheet2.write(place_counter_missing, 1, 'run 1') 
        if os.path.exists(file_to_find_run2)==False: #if run 2 is missing, marking in missing excel file
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 
            sheet2.write(place_counter_missing, 1, 'run 2') 
        
#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [50]:
#FrACT 

measure_dir = fract_dir
session = physio
file_type = '.txt'

save_name_complete = 'physio_present_' + data_date + '.xls'
save_name_missing = 'physio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('FrACT complete')
sheet2 = wb2_audio.add_sheet('FrACT missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'subID')
sheet2.write(0,0,'subID')

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()  

#checking whole path to find sub file in directory
for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find = measure_dir + '\\' + x + file_type #writing file to check for, based on looping through current subs
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find)==True:
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 0, x) 
        if os.path.exists(file_to_find)==False:
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 


#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [51]:
#Interoception task  TEST
#main task behavioral, volume calibration behavioral, and BioPac

measure_dir = interoception_dir
session = physio
file_type_edat = 'dat3'
file_type_txt = '.txt'
file_type_biopac = 'SOS_Interoception_BioPac_'

save_name_complete = 'physio_present_' + data_date + '.xls'
save_name_missing = 'physio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('Interoception complete')
sheet2 = wb2_audio.add_sheet('Interoception missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'main task behavioral')
sheet1.write(0,1,'volume calibration')
sheet1.write(0,2,'biopac')
sheet2.write(0,0,'main task behavioral')
sheet2.write(0,1,'volume calibration')
sheet2.write(0,2,'biopac')

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()

###
### MAIN TASK BEHAVIORAL ###
###

#listing all files within the directory
all_dir_list = os.listdir(measure_dir + '\\main task behavioral') 

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0
folder_counter=0
df_temp_list = pd.DataFrame(columns = ['subID', 'file type'])

#creating temp lists to know what files are in the data backup folder
for y in all_dir_list:
    current_file = y #selecting current file based on looping y
    current_file_sub = current_file[0:6] #selecting first 6 characters, which are subID
    current_file_end = current_file[len(current_file) - 4 : len(current_file)] #selecting last 3 characters to decide file type
    df_temp_list = df_temp_list.append({'subID' : current_file_sub, 'file type' : current_file_end}, ignore_index = True) #appending temp list of subIDs in data backup folder
    folder_counter = folder_counter + 1
    
temp_subs_list = df_temp_list["subID"].tolist()
temp_file_list = df_temp_list["file type"].tolist()

for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if x in temp_subs_list:
            df_subset = df_temp_list[df_temp_list['subID'] == x]
            df_subset = df_subset["file type"].tolist()
            if file_type_edat in df_subset:
                if file_type_txt in df_subset: #both files are there! yay!
                    place_counter_complete=place_counter_complete+1  
                    sheet1.write(place_counter_complete, 0, x) #writing to complete excel
        else: #something is missing
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) #writing to missing excel
        
###  
### VOLUME CALIBRATION ###
###

#listing all files within the directory
all_dir_list = os.listdir(measure_dir + '\\volume calibration behavioral') 

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0
folder_counter=0
df_temp_list = pd.DataFrame(columns = ['subID', 'file type'])

#creating temp lists to know what files are in the data backup folder
for y in all_dir_list:
    current_file = y #selecting current file based on looping y
    current_file_sub = current_file[0:6] #selecting first 6 characters, which are subID
    current_file_end = current_file[len(current_file) - 4 : len(current_file)] #selecting last 3 characters to decide file type
    df_temp_list = df_temp_list.append({'subID' : current_file_sub, 'file type' : current_file_end}, ignore_index = True) #appending temp list of subIDs in data backup folder
    folder_counter = folder_counter + 1
    
temp_subs_list = df_temp_list["subID"].tolist()
temp_file_list = df_temp_list["file type"].tolist()

for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if x in temp_subs_list:
            df_subset = df_temp_list[df_temp_list['subID'] == x]
            df_subset = df_subset["file type"].tolist()
            if file_type_edat in df_subset:
                if file_type_txt in df_subset: #both files are there! yay!
                    place_counter_complete=place_counter_complete+1  
                    sheet1.write(place_counter_complete, 1, x) #writing to complete excel
        else: #something is missing
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 1, x) #writing to missing excel
        
###
### BIOPAC ###
###

#listing all files within the directory
#checking whole path to find sub file in directory

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0

for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find = measure_dir + '\\BioPac\\' + file_type_biopac + x + '.acq' #writing file to check for, based on looping through current subs
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find)==True:
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 2, x) 
        if os.path.exists(file_to_find)==False:
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 2, x) 


#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [52]:
#Lie video task
#behavioral and BioPac

measure_dir = video_dir
session = physio
file_type_biopac = 'SOS_video_BioPac_'

save_name_complete = 'physio_present_' + data_date + '.xls'
save_name_missing = 'physio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('Video data complete')
sheet2 = wb2_audio.add_sheet('Video data missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'behavioral')
sheet1.write(0,1,'biopac')
sheet2.write(0,0,'behavioral')
sheet2.write(0,1,'which beh run missing')
sheet2.write(0,2,'biopac (both runs saved in one file)')

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()   


###
### BEHAVIORAL ###
###

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0


#checking whole path to find sub file in directory
for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find_run1 = measure_dir + '\\behavioral\\' + x + '_1' #writing file to check for, based on looping through current subs
    file_to_find_run2 = measure_dir + '\\behavioral\\' + x + '_2'
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find_run1)==True & os.path.exists(file_to_find_run2)==True: #if both files are present, mark in complete
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 0, x) 
        if os.path.exists(file_to_find_run1)==False: #if run 1 is missing, marking in missing excel file
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 
            sheet2.write(place_counter_missing, 1, 'run 1') 
        if os.path.exists(file_to_find_run2)==False: #if run 2 is missing, marking in missing excel file
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) 
            sheet2.write(place_counter_missing, 1, 'run 2') 
    

###
### BIOPAC ###
###

place_counter_missing=0
place_counter_complete=0

for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    file_to_find = measure_dir + '\\BioPac\\' + file_type_biopac + x + '.acq' #writing file to check for, based on looping through current subs
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if os.path.exists(file_to_find)==True:
            place_counter_complete=place_counter_complete+1 
            sheet1.write(place_counter_complete, 1, x) 
        if os.path.exists(file_to_find)==False:
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 2, x) 



#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [53]:
#OSPAN task
#edat and txt files

measure_dir = ospan_dir
session = physio
file_type_edat = 'dat3'
file_type_txt = '.txt'

save_name_complete = 'physio_present_' + data_date + '.xls'
save_name_missing = 'physio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('OSPAN complete')
sheet2 = wb2_audio.add_sheet('OSPAN missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'edat file')
sheet1.write(0,1,'txt file')
sheet2.write(0,0,'subID file')

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()

#listing all files within the directory
all_dir_list = os.listdir(measure_dir) 

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_complete=0
folder_counter=0
df_temp_list = pd.DataFrame(columns = ['subID', 'file type'])

#creating temp lists to know what files are in the data backup folder
for y in all_dir_list:
    current_file = y #selecting current file based on looping y
    current_file_sub = current_file[19:26] #selecting first 6 characters, which are subID
    current_file_end = current_file[len(current_file) - 4 : len(current_file)] #selecting last 3 characters to decide file type
    df_temp_list = df_temp_list.append({'subID' : current_file_sub, 'file type' : current_file_end}, ignore_index = True) #appending temp list of subIDs in data backup folder
    folder_counter = folder_counter + 1
    
temp_subs_list = df_temp_list["subID"].tolist()
temp_file_list = df_temp_list["file type"].tolist()

for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        temp_x = (x[0:3] + '.' + x[3:6])
        if temp_x in temp_subs_list:
            df_subset = df_temp_list[df_temp_list['subID'] == temp_x]
            df_subset = df_subset["file type"].tolist()
            if file_type_edat in df_subset: 
                place_counter_complete=place_counter_complete+1 
                sheet1.write(place_counter_complete, 0, x) #writing to complete excel
                if file_type_txt in df_subset: #both files are there! yay!
                    sheet1.write(place_counter_complete, 1, x) #writing to complete excel
        else: #something is missing
            place_counter_missing=place_counter_missing+1
            sheet2.write(place_counter_missing, 0, x) #writing to missing excel
            
#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)

In [54]:
#SIGT behavioral data 
#(SIGT survey data in qualtrics, so data check for survey data in qualtrics data check)

measure_dir = sigt_dir
session = physio

save_name_complete = 'physio_present_' + data_date + '.xls'
save_name_missing = 'physio_missing_' + data_date + '.xls'

sheet1 = wb1_audio.add_sheet('SIGT behavioral complete')
sheet2 = wb2_audio.add_sheet('SIGT behavioral missing')

#putting headers of subID in each excel sheet
sheet1.write(0,0,'data exists from AOL session')
sheet1.write(0,1,'data should exist from Decisions study')
sheet2.write(0,0,'subID')

#READING IN REDCAP FILE
#reading in the redcap file -- this is just for SIGT to check if SIGT was needed
col_list_redcap = [session, "record_id", "v5_sigt_alreadydone"]
df_redcap = pd.read_csv(r'' + redcap_dir + redcap_file,  usecols=col_list_redcap)

#turning the imported excel columns into lists so we can manipulate the data
df_red_ses_check = df_redcap[session].tolist()
df_red_subids = df_redcap["record_id"].tolist()
df_red_sigt_done = df_redcap["v5_sigt_alreadydone"].tolist()

#listing all files within the directory
all_dir_list = os.listdir(measure_dir) 

#setting a counter to input data in the correct excel cell
place_counter_missing=0
place_counter_decisions=0
place_counter_complete=0
folder_counter=0
df_temp_list = pd.DataFrame(columns = ['subID'])

#creating temp lists to know what files are in the data backup folder
for y in all_dir_list:
    current_file = y #selecting current file based on looping y
    current_file_sub = current_file[0:6] #selecting first 6 characters, which are subID
    #current_file_end = current_file[len(current_file) - 4 : len(current_file)] #selecting last 3 characters to decide file type
    df_temp_list = df_temp_list.append({'subID' : current_file_sub}, ignore_index = True) #appending temp list of subIDs in data backup folder
    folder_counter = folder_counter + 1
    
temp_subs_list = df_temp_list["subID"].tolist()

for x in df_updating_subs:
    loc = df_red_subids.index(x) #finding the location of the subID
    if df_red_ses_check[loc]==2: #checking if session where this measure is administered is complete; if not, don't check for this item
        if df_red_sigt_done[loc]==0: #checking if SIGT has already been done. if 0, has not been done, and needs to be done
            if x in temp_subs_list:
                place_counter_complete=place_counter_complete+1  
                sheet1.write(place_counter_complete, 0, x) #writing to complete excel
            else: #something is missing
                place_counter_missing=place_counter_missing+1
                sheet2.write(place_counter_missing, 0, x) #writing to missing excel
        if df_red_sigt_done[loc]==1: #checking if SIGT has already been done. if 1, has already been done, and we have data from 'decisions study'
            place_counter_decisions=place_counter_decisions+1
            sheet1.write(place_counter_decisions, 1, x) #writing to complete excel 
            
#saving the workborks .xlsx files
wb1_audio.save(save_dir + save_name_complete)
wb2_audio.save(save_dir + save_name_missing)