In [1]:
# This script checks for FMRIPREP func files, identifies them and writes them to a file called "MISSING_FMRIPREP".
# This script also uses the confound derivatives from fmriprep output and extracts the
# Framewise Displacement column. Then it thresholds the values for both 0.2 (conservative) 
# and 0.4 (lenient) for determining motion outlier runs for final analyses. 
# It generates both the INCLUDE.csv file and the Runs_MotionFiltered.csv. 
# INCLUDE.csv contains all information about each run (# FD outliers, binary exclusion criteria
# based on 0.2 and 0.4 threshold). #Runs_MotionFiltered.csv contains names of all subjects that
# are to be included in the analysis.
# textfiles are also creates in each subject fmriprep folder with the motion criteria

In [2]:
import numpy as np
import csv
from glob import glob
import re
import os
from statistics import mean

In [3]:
def remove_junk(main_list, junk):
    counter = 0
    for elements in main_list:
        if elements in junk:
            main_list[counter] = ''
        counter = counter + 1
    main_list = list(filter(None, main_list))
    return main_list

In [4]:
# subject look up table conversion (IGNORING undescores)
def Convert_Subname(Oldname):
    tmp_root = '/om/user/rezzo/Subject_Conversion_Table.csv'

    with open(tmp_root, "r") as tsv:
        for line in csv.reader(tsv,  delimiter = ","):
            if Oldname == line[1].replace("_",""):
                Newname = line[0]
            else:
                continue
    return Newname  

In [5]:
def remove_underscores(subname):
    newname = subname.replace("_","")
    return newname  

In [6]:
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

In [7]:
def substring_after(s, delim):
    return s.partition(delim)[2]

In [8]:
# read in site folder:
root = '/om/group/saxelab/OpenAutism/data/'

all_sites = os.listdir("/om/group/saxelab/OpenAutism/data/")

site_other_folders = ['subjectlists', 'TextFiles','.DS_Store', '._.DS_Store']
subject_other_folders = ['mriqc_output', 'BIDS', '_OLD_', 'SPM_firstlevel','.DS_Store', '._.DS_Store','SPOverview.xlsx']


all_sites = remove_junk(all_sites, site_other_folders)
all_sites.sort()

# for each site look at how many subjects;

for site in all_sites:

    all_subjects = os.listdir(root+site)
    all_subjects = remove_junk(all_subjects, subject_other_folders)
    all_subjects.sort()  
    
    # to look at a specific subject's tasks

    for subject in all_subjects:
        subject = remove_underscores(subject)
        #print(subject)
        
        fmriprep = root+site+'/BIDS/derivatives/fmriprep/sub-'+subject
        location = root+site+'/BIDS/derivatives/fmriprep/sub-'+subject+'/func/'

        if os.path.isdir(location):
            inner_dir = os.listdir(location)
            #print(inner_dir)

            # for the elements in directory list, make an exact list of tasks defined by preproc. file created:

            task_list = []

            for elements in inner_dir:
                cur_task = find_between(elements, 'sub-'+subject+'_', '_bold_space-MNI152NLin2009cAsym_variant-smoothAROMAnonaggr_preproc.nii.gz')
                if cur_task != '':
                    task_list.append(cur_task)

            for tasks in task_list:
                print(subject + tasks)
                if os.path.isfile(location+'sub-'+subject+'_'+tasks+'_bold_confounds.tsv') is False:
                    #print('missing confound file for '+subject+' '+cur_task)
                    fo = open("MISSING_FMRIPREP.csv", "a")          
                    line = fo.writelines('sub-'+subject+tasks+'\t'+'no_confound_file'+'\n')
                    fo.close()
                else:
                    #print("ready to go")
                    ls = [] #array of valid values
                    cor = [] #array with 0s and 1s (for >= 0.2)
                    cor2 = [] #array with 0s and 1s (for >= 0.4)
                    cor_int = []
                    cor2_int = []

                    # Only interested in the 6th column (FD)
                    with open(location+'sub-'+subject+"_"+tasks+'_bold_confounds.tsv') as tsv:
                        for line in csv.reader(tsv, dialect="excel-tab"):
                            array = line[6]
                            #print(array)
                            try:
                                array = float(array)
                                ls.append(array)
                                cor.append(array >= 0.2)
                                cor_int.append(array >= 0.2)
                                cor2.append(array >= 0.4)
                                cor2_int.append(array >= 0.4)
                            except ValueError:   # Catch the error if user input is not a number
                                #print('Could not read number for sub-' +subject+tasks) 
                                continue  

                    ## converting boolean true/false to zeros/ones
                    for element in range (0,len(cor)):
                        if cor[element] == 0:
                            cor[element] = int(0)
                            cor_int[element] = cor[element]
                            cor[element] = str(cor[element])
                        else:
                            cor[element] = int(1)
                            cor_int[element] = cor[element]
                            cor[element] = str(cor[element])

                    ## converting boolean true/false to zeros/ones
                    for element in range (0,len(cor2)):
                        if cor2[element] == 0:
                            cor2[element] = int(0)
                            cor2_int[element] = cor2[element]
                            cor2[element] = str(cor2[element])
                        else:
                            cor2[element] = int(1)
                            cor2_int[element] = cor2[element]
                            cor2[element] = str(cor2[element])

                    motion_mean = mean(ls)

                    # if 25% of the trial is motion, exclude
                    temp_len = len(cor) # in case certain runs have different lengths
                    exclude = (sum(cor_int) >= (0.25*temp_len)) #changed this so be careful it is right
                    exclude2 = (sum(cor2_int) >= (0.25*temp_len))


                    out0 = Convert_Subname(subject)
                    out1 = site
                    out2 = 'sub-'+subject
                    out3 = find_between(tasks, "task-", "_run") # task label
                    out4 = substring_after(tasks, "run-") # run lavel
                    out5 = str(sum(cor_int))
                    out6 = str(int(exclude))
                    out7 = str(sum(cor2_int))  
                    out8 = str(int(exclude2))
                    out9 = str(motion_mean)

                    # save to a text file
                    fo = open("INCLUDE.csv", "a")
                    for element in range (0,1):
                    # Write sequence of lines at the end of the file.
                        fo.write(out0 + "\t" + out1 + "\t" + out2 + "\t" + out3 + "\t" + out4 + "\t" + out5 + "\t"
                                + out6+ "\t" + out7+ "\t" + out8 + "\t" + out9 + "\n")
                        fo.close()
                        
#                      # save to a text file
#                     fo = open(location + 'sub-'+subject+tasks+ "_MotionThreshold_0.2.txt", "w")
#                     for element in range (0,len(cor)):
#                     # Write sequence of lines at the end of the file.
#                         line = fo.writelines(cor[element]+'\n')
#                     fo.close()

#                     # save to a text file
#                     fo = open(location + 'sub-'+subject+tasks+ "_MotionThreshold_0.4.txt", "w")
#                     for element in range (0,len(cor2)):
#                     # Write sequence of lines at the end of the file.
#                         line = fo.writelines(cor2[element]+'\n')
#                     fo.close()
        elif (os.path.isdir(fmriprep) is False):
            #print("Missing MAIN fmriprep FOLDER for subject "+subject)
            fo = open("MISSING_FMRIPREP.csv", "a")          
            line = fo.writelines(subject+'\t'+'no_fmriprep_folder'+'\n')
            fo.close()   
        elif (os.path.isdir(location) is False):
            #print("Missing FUNC fmriprep FOLDER for subject "+subject)
            fo = open("MISSING_FMRIPREP.csv", "a")          
            line = fo.writelines(subject+'\t'+'no_func_folder'+'\n')
            fo.close()         


        


SAXSCOTTFM01task-sholo_run-001
SAXSCOTTFM02task-sholo_run-001
SAXSCOTTFM03task-sholo_run-001
SAXSCOTTFM04task-sholo_run-001
SAXSCOTTFM05task-sholo_run-001
SAXSCOTTFM06task-sholo_run-001
SAXSCOTTFM07task-sholo_run-001
SAXSCOTTFM08task-sholo_run-001
SAXSCOTTFM10task-sholo_run-001
SAXSCOTTFM11task-sholo_run-001
SAXSCOTTFM12task-sholo_run-001
SAXSCOTTFM13task-sholo_run-001
SAXSCOTTFM14task-sholo_run-001
SAXSCOTTFM15task-sholo_run-001
SAXSCOTTFM17task-sholo_run-001
SAXSCOTTFM18task-sholo_run-001
SAXSCOTTFM20task-sholo_run-001
SAXSCOTTFM21task-sholo_run-001
SAXSCOTTFM22task-sholo_run-001
SAXSCOTTFM23task-sholo_run-001
SAXSCOTTFM24task-sholo_run-001
SAXSCOTTFM26task-sholo_run-001
SAXSCOTTFM27task-sholo_run-001
SAXSCOTTFM28task-sholo_run-001
SAXSCOTTFM31task-sholo_run-001
SAXSCOTTFM34task-sholo_run-001
SAXSCOTTFM36task-sholo_run-001
SAXSCOTTFM37task-sholo_run-001
SAXSCOTTFM38task-sholo_run-001
SAXSCOTTFM41task-sholo_run-001
SAXSCOTTFM42task-sholo_run-001
SAXSCOTTFM43task-sholo_run-001
SAXSCOTT

SAXDOD017task-morphing_run-001
SAXDOD019task-morphing_run-001
SAXDOD019task-morphing_run-002
SAXDOD019task-morphing_run-003
SAXDOD019task-morphing_run-004
SAXDOD022task-morphing_run-004
SAXDOD022task-morphing_run-002
SAXDOD022task-morphing_run-001
SAXDOD022task-morphing_run-003
SAXDOD023task-morphing_run-003
SAXDOD023task-morphing_run-001
SAXDOD023task-morphing_run-004
SAXDOD023task-morphing_run-002
SAXDOD024task-morphing_run-003
SAXDOD024task-morphing_run-002
SAXDOD024task-morphing_run-001
SAXDOD024task-morphing_run-004
SAXDOD025task-morphing_run-001
SAXDOD025task-morphing_run-003
SAXDOD025task-morphing_run-002
SAXDOD025task-morphing_run-004
SAXDOD026task-morphing_run-001
SAXDOD026task-morphing_run-004
SAXDOD026task-morphing_run-003
SAXDOD026task-morphing_run-002
SAXDOD027task-morphing_run-004
SAXDOD027task-morphing_run-002
SAXDOD027task-morphing_run-001
SAXDOD027task-morphing_run-003
SAXDOD028task-morphing_run-001
SAXDOD028task-morphing_run-002
SAXDOD028task-morphing_run-004
SAXDOD02

SAXtasti205task-tomloc_run-002
SAXtasti205task-tomloc_run-001
SAXtasti206task-tomloc_run-001
SAXtasti206task-sholo_run-001
SAXtasti206task-tomloc_run-002
SAXtasti206task-biomo_run-001
SAXtasti207task-tomloc_run-002
SAXtasti207task-sholo_run-001
SAXtasti207task-tomloc_run-001
SAXtasti207task-biomo_run-001
SAXtasti209task-sholo_run-001
SAXtasti209task-tomloc_run-001
SAXtasti209task-tomloc_run-002
SAXtasti209task-biomo_run-001
SAXtasti210task-sholo_run-001
SAXtasti210task-tomloc_run-001
SAXtasti210task-tomloc_run-002
SAXtasti210task-biomo_run-001
SAXtasti211task-tomloc_run-002
SAXtasti211task-sholo_run-001
SAXtasti211task-tomloc_run-001
SAXtasti211task-biomo_run-001
SAXtasti212task-biomo_run-001
SAXtasti212task-tomloc_run-001
SAXtasti212task-sholo_run-001
SAXtasti212task-tomloc_run-002
SAXtasti213task-tomloc_run-001
SAXtasti213task-tomloc_run-002
SAXtasti213task-biomo_run-001
SAXtasti213task-sholo_run-001
SAXtasti214task-biomo_run-001
SAXtasti214task-tomloc_run-002
SAXtasti214task-tomloc_

In [11]:
# create a final text file with only subjects that are NOT motion outliers (EXCLUDING TOMLOC)

pathway = '/om/user/rezzo/'

Motion_filt_lenient = []
Motion_filt_strict = []

with open(pathway+"INCLUDE.csv", "r") as infile:
    X = [list(map(str, line.split())) for line in infile]
    for line in X:
        #print(line)
        if line[8] == '0' and line[3] != 'tomloc':
            print(line[8])
            Motion_filt_lenient.append(line)
        if line[6] == '0' and line[3] != 'tomloc':
            Motion_filt_strict.append(line)
            
# save to a text file
fo = open("/om/user/rezzo/OpenAutism/analysis_data/MOTION_INFO/Runs_LenientMotionFiltered.tsv", "a")
for element in range (0,len(Motion_filt_lenient)):
#     print(Motion_filt_lenient[element])
    fo.write(Motion_filt_lenient[element][0]+Motion_filt_lenient[element][3]+Motion_filt_lenient[element][4]+ "\n")
fo.close()

fo = open("/om/user/rezzo/OpenAutism/analysis_data/MOTION_INFO/Runs_StrictMotionFiltered.tsv", "a")
for element in range (0,len(Motion_filt_strict)):
#     print(Motion_filt_strict[element])
    fo.write(Motion_filt_strict[element][0]+Motion_filt_strict[element][3]+Motion_filt_strict[element][4]+ "\n")
fo.close()



0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [209]:
##also save INCLUDE file with all motion info, separating tomloc

f=pd.read_csv(pathway+"INCLUDE.csv", sep='\t', header=None)
f.columns = ["Sub_ID", "site", "old_id", "Task", "Run", "# FD > 0.2", "Exclude_strict", "# FD > 0.4", "Exclude_lenient", "ave_motion"]
f = f.drop(['site', 'old_id'], axis=1)
f = f[f.Task != 'tomloc']


f.to_csv("/om/user/rezzo/OpenAutism/analysis_data/MOTION_INFO/runs_motion_info.txt", sep='\t', 
                  header=True)
