In [76]:
# This script checks for FMRIPREP func files, identifies them and writes them to a file called "MISSING_FMRIPREP".
# This script also uses the confound derivatives from fmriprep output and extracts the
# Framewise Displacement column. Then it thresholds the values for both 0.2 (conservative) 
# and 0.4 (lenient) for determining motion outlier runs for final analyses. 
# It generates the INCLUDE.csv file which contains all information about each run 
# FD outliers, binary exclusion criteria based on 0.2 and 0.4 threshold).

In [77]:
import numpy as np
import csv
from glob import glob
import re
import os
from statistics import mean

In [78]:
def remove_junk(main_list, junk):
    counter = 0
    for elements in main_list:
        if elements in junk:
            main_list[counter] = ''
        counter = counter + 1
    main_list = list(filter(None, main_list))
    return main_list

In [79]:
# subject look up table conversion (IGNORING undescores)
def Convert_Subname(Oldname):
    # this may no longer be a dynamic file 
    # if need to update this, can use the vertical concatenation of the first two columns of these files:
    # /om/user/rezzo/TOMLOC_info/tomloc_subject_info_internal.csv
    # /om/group/saxelab/OpenAutism/data/Subject_Task_Info_Dima/subject_info_internal.csv
    tmp_root = '/om/user/rezzo/Subject_Conversion_Table.csv' 
    
    with open(tmp_root, "r") as tsv:
        for line in csv.reader(tsv,  delimiter = ","):
            if Oldname == line[1].replace("_",""):
                Newname = line[0]
            else:
                continue
    return Newname  

In [80]:
def remove_underscores(subname):
    newname = subname.replace("_","")
    return newname  

In [81]:
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

In [82]:
def substring_after(s, delim):
    return s.partition(delim)[2]

In [83]:
# read in site folder:

open('INCLUDE.csv', 'w').close()
root = '/om/group/saxelab/OpenAutism/data/'
all_sites = os.listdir("/om/group/saxelab/OpenAutism/data/")

site_other_folders = ['subjectlists', 'TextFiles','Subject_Task_Info_Dima','.DS_Store', '._.DS_Store']
subject_other_folders = ['mriqc_output', 'BIDS', '_OLD_', 'SPM_firstlevel','.DS_Store', '._.DS_Store','SPOverview.xlsx']


all_sites = remove_junk(all_sites, site_other_folders)
all_sites.sort()

# for each site look at how many subjects;

for site in all_sites:

    all_subjects = os.listdir(root+site)
    all_subjects = remove_junk(all_subjects, subject_other_folders)
    all_subjects.sort()  
    
    # to look at a specific subject's tasks

    for subject in all_subjects:
        tempsub = subject
        print(tempsub)
        subject = remove_underscores(subject)
        
        try:
            fmriprep = root+site+'/BIDS/derivatives/fmriprep/sub-'+subject
            location = root+site+'/BIDS/derivatives/fmriprep/sub-'+subject+'/func/'
            if os.path.isdir(location):
                inner_dir = os.listdir(location)
                #print(inner_dir)

                # for the elements in directory list, make an exact list of tasks defined by preproc. file created:

                task_list = []

                for elements in inner_dir:
                    cur_task = find_between(elements, 'sub-'+subject+'_', '_bold_space-MNI152NLin2009cAsym_variant-smoothAROMAnonaggr_preproc.nii.gz')
                    if cur_task != '':
                        task_list.append(cur_task)

                for tasks in task_list:
                    print(subject + tasks)
                    if os.path.isfile(location+'sub-'+subject+'_'+tasks+'_bold_confounds.tsv') is False:
                        #print('missing confound file for '+subject+' '+cur_task)
                        fo = open("MISSING_FMRIPREP.csv", "a")          
                        line = fo.writelines('sub-'+subject+tasks+'\t'+'no_confound_file'+'\n')
                        fo.close()
                    else:
                        #print("ready to go")
                        ls = [] #array of valid values
                        cor = [] #array with 0s and 1s (for >= 0.2)
                        cor2 = [] #array with 0s and 1s (for >= 0.4)
                        cor_int = []
                        cor2_int = []

                        # Only interested in the 6th column (FD)
                        with open(location+'sub-'+subject+"_"+tasks+'_bold_confounds.tsv') as tsv:
                            for line in csv.reader(tsv, dialect="excel-tab"):
                                array = line[6]
                                #print(array)
                                try:
                                    array = float(array)
                                    ls.append(array)
                                    cor.append(array >= 0.2)
                                    cor_int.append(array >= 0.2)
                                    cor2.append(array >= 0.4)
                                    cor2_int.append(array >= 0.4)
                                except ValueError:   # Catch the error if user input is not a number
                                    #print('Could not read number for sub-' +subject+tasks) 
                                    continue  

                        ## converting boolean true/false to zeros/ones
                        for element in range (0,len(cor)):
                            if cor[element] == 0:
                                cor[element] = int(0)
                                cor_int[element] = cor[element]
                                cor[element] = str(cor[element])
                            else:
                                cor[element] = int(1)
                                cor_int[element] = cor[element]
                                cor[element] = str(cor[element])

                        ## converting boolean true/false to zeros/ones
                        for element in range (0,len(cor2)):
                            if cor2[element] == 0:
                                cor2[element] = int(0)
                                cor2_int[element] = cor2[element]
                                cor2[element] = str(cor2[element])
                            else:
                                cor2[element] = int(1)
                                cor2_int[element] = cor2[element]
                                cor2[element] = str(cor2[element])

                        motion_mean = mean(ls)

                        # if 25% of the trial is motion, exclude
                        temp_len = len(cor) # in case certain runs have different lengths
                        exclude = (sum(cor_int) >= (0.25*temp_len)) #changed this so be careful it is right
                        exclude2 = (sum(cor2_int) >= (0.25*temp_len))


                        out0 = Convert_Subname(subject)
                        out1 = site
                        out2 = 'sub-'+subject
                        out3 = find_between(tasks, "task-", "_run") # task label
                        out4 = substring_after(tasks, "run-") # run lavel
                        out5 = str(sum(cor_int))
                        out6 = str(int(exclude))
                        out7 = str(sum(cor2_int))  
                        out8 = str(int(exclude2))
                        out9 = str(motion_mean)

                        # save to a text file
                        fo = open("INCLUDE.csv", "a")
                        for element in range (0,1):
                        # Write sequence of lines at the end of the file.
                            fo.write(out0 + "\t" + out1 + "\t" + out2 + "\t" + out3 + "\t" + out4 + "\t" + out5 + "\t"
                                    + out6+ "\t" + out7+ "\t" + out8 + "\t" + out9 + "\n")
                            fo.close()
        except UnboundLocalError:
            print("Check for "+subject+"in subject conversion file and for no counfound file in bids-func.")


SAX_SCOTTFM_01
SAXSCOTTFM01task-sholo_run-001
SAX_SCOTTFM_02
SAXSCOTTFM02task-sholo_run-001
SAX_SCOTTFM_03
SAXSCOTTFM03task-sholo_run-001
SAX_SCOTTFM_04
SAXSCOTTFM04task-sholo_run-001
SAX_SCOTTFM_05
SAXSCOTTFM05task-sholo_run-001
SAX_SCOTTFM_06
SAXSCOTTFM06task-sholo_run-001
SAX_SCOTTFM_07
SAXSCOTTFM07task-sholo_run-001
SAX_SCOTTFM_08
SAXSCOTTFM08task-sholo_run-001
SAX_SCOTTFM_09
SAX_SCOTTFM_10
SAXSCOTTFM10task-sholo_run-001
SAX_SCOTTFM_11
SAXSCOTTFM11task-sholo_run-001
SAX_SCOTTFM_12
SAXSCOTTFM12task-sholo_run-001
SAX_SCOTTFM_13
SAXSCOTTFM13task-sholo_run-001
SAX_SCOTTFM_14
SAXSCOTTFM14task-sholo_run-001
SAX_SCOTTFM_15
SAXSCOTTFM15task-sholo_run-001
SAX_SCOTTFM_17
SAXSCOTTFM17task-sholo_run-001
SAX_SCOTTFM_18
SAXSCOTTFM18task-sholo_run-001
SAX_SCOTTFM_20
SAXSCOTTFM20task-sholo_run-001
SAX_SCOTTFM_21
SAXSCOTTFM21task-sholo_run-001
SAX_SCOTTFM_22
SAXSCOTTFM22task-sholo_run-001
SAX_SCOTTFM_23
SAXSCOTTFM23task-sholo_run-001
SAX_SCOTTFM_24
SAXSCOTTFM24task-sholo_run-001
SAX_SCOTTFM_25
SAX_

SAXCBTNT33task-morphing_run-003
SAXCBTNT33task-morphing_run-002
SAXCBTNT33task-morphing_run-004
SAX_CBTNT_34
SAXCBTNT34task-morphing_run-002
SAXCBTNT34task-morphing_run-003
SAXCBTNT34task-sholo_run-001
SAXCBTNT34task-morphing_run-004
SAXCBTNT34task-morphing_run-001
SAX_CBTNT_35
SAXCBTNT35task-morphing_run-001
SAXCBTNT35task-sholo_run-001
SAXCBTNT35task-morphing_run-004
SAXCBTNT35task-morphing_run-002
SAXCBTNT35task-morphing_run-003
SAX_CBTNT_36
SAXCBTNT36task-sholo_run-001
SAXCBTNT36task-morphing_run-001
SAXCBTNT36task-morphing_run-003
SAXCBTNT36task-morphing_run-004
SAXCBTNT36task-morphing_run-002
SAX_DOD_001
SAXDOD001task-morphing_run-001
SAXDOD001task-morphing_run-003
SAXDOD001task-morphing_run-004
SAXDOD001task-morphing_run-002
SAX_DOD_002
SAXDOD002task-morphing_run-001
SAXDOD002task-morphing_run-004
SAXDOD002task-morphing_run-002
SAXDOD002task-morphing_run-003
SAX_DOD_003
SAXDOD003task-morphing_run-002
SAXDOD003task-morphing_run-003
SAXDOD003task-morphing_run-004
SAXDOD003task-mor

SAXEIB41task-dyloc_run-001
SAXEIB41task-tomloc_run-002
SAX_EIB_42
SAXEIB42task-dyloc_run-002
SAXEIB42task-dyloc_run-001
SAXEIB42task-tomloc_run-001
SAXEIB42task-tomloc_run-002
SAX_EIB_43
SAXEIB43task-tomloc_run-001
SAXEIB43task-tomloc_run-002
SAX_EIB_44
SAXEIB44task-tomloc_run-002
SAXEIB44task-tomloc_run-001
SAX_EIB_45
SAXEIB45task-tomloc_run-002
SAXEIB45task-tomloc_run-001
SAX_EIB_46
SAXEIB46task-tomloc_run-001
SAXEIB46task-tomloc_run-002
SAX_EIB_47
SAX_EIB_48
SAXEIB48task-tomloc_run-001
SAXEIB48task-tomloc_run-002
SAX_EIB_49
SAXEIB49task-tomloc_run-001
SAXEIB49task-tomloc_run-002
SAX_EIB_50
SAXEIB50task-tomloc_run-001
SAXEIB50task-tomloc_run-002
SAX_EIB_51
SAXEIB51task-tomloc_run-002
SAXEIB51task-tomloc_run-001
SAX_EIB_52
SAXEIB52task-tomloc_run-001
SAXEIB52task-tomloc_run-002
SAX_EIB_53
SAXEIB53task-tomloc_run-002
SAXEIB53task-tomloc_run-001
SAX_EIB_54
SAXEIB54task-tomloc_run-001
SAXEIB54task-tomloc_run-002
SAX_EIB_55
SAXEIB55task-tomloc_run-001
SAXEIB55task-tomloc_run-002
SAX_EIB_5

Check for KANea021akin subject conversion file and for no counfound file in bids-func.
KAN_ea022ak
KANea022aktask-dyloc_run-004
Check for KANea022akin subject conversion file and for no counfound file in bids-func.
KAN_ea023ak
KANea023aktask-dyloc_run-003
Check for KANea023akin subject conversion file and for no counfound file in bids-func.
KAN_ea024ak
KANea024aktask-dyloc_run-001
Check for KANea024akin subject conversion file and for no counfound file in bids-func.
KAN_ea024bk
KANea024bktask-dyloc_run-002
Check for KANea024bkin subject conversion file and for no counfound file in bids-func.
KAN_ea025ak
KANea025aktask-dyloc_run-002
Check for KANea025akin subject conversion file and for no counfound file in bids-func.
KAN_ea026ak
KANea026aktask-dyloc_run-002
Check for KANea026akin subject conversion file and for no counfound file in bids-func.
KAN_ea030ak
KANea030aktask-dyloc_run-004
Check for KANea030akin subject conversion file and for no counfound file in bids-func.
KAN_ea035bk
KANea

SAXtasti233task-biomo_run-001
SAXtasti233task-tomloc_run-002
SAX_tasti2_34
SAXtasti234task-tomloc_run-002
SAXtasti234task-biomo_run-001
SAXtasti234task-tomloc_run-001
SAXtasti234task-sholo_run-001
SAX_tasti2_35
SAXtasti235task-tomloc_run-002
SAXtasti235task-biomo_run-001
SAXtasti235task-tomloc_run-001
SAXtasti235task-sholo_run-001
SAX_tasti2_36
SAXtasti236task-sholo_run-001
SAXtasti236task-tomloc_run-002
SAXtasti236task-tomloc_run-001
SAXtasti236task-biomo_run-001
SAX_tasti2_37
SAXtasti237task-biomo_run-001
SAXtasti237task-tomloc_run-002
SAXtasti237task-tomloc_run-001
SAXtasti237task-sholo_run-001
SAX_tasti2_40
SAXtasti240task-sholo_run-001
SAXtasti240task-tomloc_run-001
SAXtasti240task-tomloc_run-002
SAXtasti240task-biomo_run-001
SAX_tasti2_41
SAXtasti241task-sholo_run-001
SAXtasti241task-tomloc_run-002
SAXtasti241task-biomo_run-001
SAXtasti241task-tomloc_run-001
SAX_tasti2_42
SAXtasti242task-sholo_run-001
SAXtasti242task-tomloc_run-002
SAXtasti242task-tomloc_run-001
SAXtasti242task-b