# Preprocessing  
This script does multiple preprocessing setup steps.  
Currently options are:  
* Setup derivative subject directories  
* Move onsets from given directory path to subject derivative onset folder  
* Get motion parameters from fmriprep tsv
* Get confounds from fmriprep tsv  
* Run fsl_motion_outliers  
* Generate report that counts files 


In [1]:
import os, glob
from IPython.core import display as ICD
import pandas as pd
import subprocess 
from shutil import copy2
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns',None)
import multiprocessing as mp


### Get the subject IDs given directory:

In [2]:
subject_ids = sorted([x.split("/")[-1] for x in 
                      glob.glob("/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/sub-*")])


In [3]:
deriv_path = "/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives"

In [4]:
move_onsets = False          
copy_imgs = False
make_folder = False

In [5]:


for sub_id in subject_ids:
    
    # Set and write directory paths
    if make_folder == True:
        subject_folder=os.path.join(deriv_path, sub_id, "ses-1")

        if os.path.exists(subject_folder):
            pass
        else:
            os.makedirs(subject_folder)

        anat_path = os.path.join(subject_folder, "anat")
        if os.path.exists(anat_path):
            pass
        else:
            os.makedirs(anat_path)

        func_path = os.path.join(subject_folder, "func")
        if os.path.exists(func_path):
            pass
        else:
            os.makedirs(func_path)

        onset_path = os.path.join(subject_folder, "func/onsets")
        if os.path.exists(onset_path):
            pass
        else:
            os.makedirs(onset_path)

        motion_path = os.path.join(subject_folder, "func/motion_assessment")
        if os.path.exists(motion_path):
            pass
        else:
            motion_param_path = os.path.join(motion_path, "motion_parameters")
            os.makedirs(motion_param_path)
            os.makedirs(motion_path)

        #print("Anatomical path: \t{} \nFunctional path: \t{} \nOnset path: \t{} \nMotion path: \t{}".format(anat_path, 
                                                                                                           #func_path,
                                                                                                           #onset_path,#motion_path))
    # Copy onsets to folders by subject 
    # may need to modify input path                                                                                         
    
                    
    if move_onsets == True:
        orig_onsets=glob.glob(os.path.join("/projects/niblab/onesets_staging/bbx_onsets_pre_12_12", "{}_*".format(sub_id)))

        for onset in orig_onsets:
            print("Onset path: \t{} \nOnset: {} \n\n".format(onset_path, onset))
            copy2(onset, onset_path)
            
    
    # gather images for visual reports 
    
    
    if copy_imgs == True:
        # get target images
        img_path="/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/fmriprep_images"
        fmriprep_imgs = glob.glob(os.path.join(img_path, "{}_*".format(sub_id)))
        if not fmriprep_imgs:
            anat_imgs=glob.glob(os.path.join("/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/fmriprep/{}/figures/{}_*T1w.svg".format(sub_id, sub_id)))
            mask_imgs=glob.glob(os.path.join("/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/fmriprep/{}/ses-1/figures/{}_*-brain_mask.svg".format(sub_id, sub_id)))
            sdc_imgs=glob.glob(os.path.join("/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/fmriprep/{}/ses-1/figures/{}_*-sdc_bold.svg".format(sub_id, sub_id)))
            fmap_imgs=glob.glob(os.path.join("/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/fmriprep/{}/ses-1/figures/{}_*-fieldmap_bold.svg".format(sub_id, sub_id)))
            flirt_imgs=glob.glob(os.path.join("/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/fmriprep/{}/ses-1/figures/{}_*-flirtbbr_bold.svg".format(sub_id, sub_id)))
        
            for grp in [anat_imgs, mask_imgs, sdc_imgs, fmap_imgs, flirt_imgs]:
                for img in grp:
                    print("Copying {} to {} \n".format(img, img_path))
                    copy2(img, img_path)
     
    
    

### We want to see what subjects already have their motion parameter files, and create the ones which are missing.

In [6]:
fsl_motion_outlier = False
prep_motion_outlier = False
get_mocos = False
move_anat = False
fd_check=False
mot_outlier = False

In [7]:
report_template = '/projects/niblab/bids_projects/Experiments/bbx/bids/code/qc_report_s1.tsv'


report_df = pd.read_csv(report_template, sep='\t')
report_df.set_index("subject", inplace=True)

In [None]:
        
        
def skull_strip(sub):
    print(">>>>---> starting bet on ", sub )
    try:
        for nifti in glob.glob(os.path.join('/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/fmriprep/{}/ses-1/func'.format(sub), '*_preproc.nii.gz')):
            # make our variables
            filename = nifti.split("/")[-1].split(".")[0]
            bet_name=filename+'_brain'
            bet_output = os.path.join("/projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/{}/ses-1/func".format(sub), bet_name)
            print("SKULL STRIP, NEW FILE TO BE MADE: ", bet_name)
            if os.path.exists(bet_output + '.nii'):
                print(bet_output + ' exists, skipping \n')
            else:
                print("Running bet on ", nifti)
                bet_cmd=("bet %s %s -F -m -f %s"%(nifti, bet_output, "0.6"))
                print(">>>-----> BET COMMAND:", bet_cmd)
                os.system(bet_cmd)
            
    except:
        pass
    
def run_fsl_motion_outliers(nifti):
        
        #for nifti in sorted(niftis): 
        file="bad_subjects.txt"
        filename=nifti.split('.')[0]
        file = filename.split("/")[-1]
        new_filename = file.split("_bold_")[0]#.split("_space")[0]
        outlier_path = "%s/%s_outlier_output.txt"%(motion_assessment_path, new_filename)
        plot_path = "%s/%s_fd_plot"%(motion_assessment_path, new_filename)
        confound_path = "%s/%s_confound.txt"%(motion_assessment_path,new_filename)
        #print(confound_path)

        nvols_cmd="fslnvols " + nifti
        volume = subprocess.check_output(nvols_cmd, shell=True, encoding="utf-8")
        volume = volume.strip()
        comparator = int(volume) *.25
        ## RUN 'fsl_motion_outliers' TO RETRIEVE MOTION CORRECTION ANALYSIS
        outlier_cmd = "fsl_motion_outliers -i %s  -o %s --fd --thresh=%s -p %s -v > %s"%(filename, confound_path, 0.9, plot_path, outlier_path)
        #print(">>-->  RUNNING FSL MOTION OUTLIERS ")
        #print("COMMAND NVOLS: ", nvols_cmd)
        
        try:
            os.system(outlier_cmd)
            print("OUTLIER CMD: ", outlier_cmd)
        except:
            print("fsl_motion_outlier command failed: \n{}".format(outlier_cmd))
        
        
        print("No confounds found, writing blank file......")
         # --sometimes you have a great subject who didn't move
        if os.path.isfile(confound_path)==False:
            os.system("touch %s"%confound_path)
            
            
        ## CHECK FOR BAD SUBJECTS: ABOVE OUR THRESHOLD
        # how many columns are there = how many 'bad' points
        check = subprocess.check_output("grep -o 1 %s | wc -l"%(confound_path), shell=True)
        num_scrub = [int(s) for s in check.split() if s.isdigit()]
        print("NUM SCRUB: ", str(num_scrub[0]), "\n")
        if num_scrub[0] > comparator: #if the number in check is greater than num_scrub then we don't want it
            with open(out_bad_bold_list, "a") as myfile: #making a file that lists all the bad ones
                myfile.write("%s/%s\n"%(deriv_path, file))
                print("wrote bad file")
            myfile.close()


In [9]:
## mocos


                
for sub_id in sorted(subject_ids):
    #sub_moco_folder = glob.glob(os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/*confound*".format(sub_id)))
    motion_assessment_path = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment".format(sub_id))

           

    
    if fsl_motion_outlier == True:
        
        print("Starting fsl_motion_outlier command")
        # Step 1: Init multiprocessing.Pool()
        pool = mp.Pool(16)

        
        niftis=glob.glob(os.path.join(deriv_path, "{}/ses-1/func/{}*_bold_brain.nii.gz".format(sub_id, sub_id)))
        
        # Step 2: `pool.apply` the `howmany_within_range()`
        pool.map(run_fsl_motion_outliers, [nifti for nifti in niftis])

        # Step 3: Don't forget to close
        pool.close() 
        
        
    if skull_strip == True:
        skull_strip(sub_id)
        
        
    if get_mocos == True:
        try:
            #print("Subject: \t{}".format(sub_id))
            tsvs = glob.glob(os.path.join(deriv_path, "fmriprep/{}/ses-1/func/{}_ses-1_task-*-confounds_regressors.tsv".format(sub_id, sub_id)))
            for tsv in tsvs:

                task=tsv.split("/")[-1].split("_")[2]
                run=tsv.split("/")[-1].split("_")[3]


                if "resting" not in task:
                    moco0 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_{}_moco0.txt".format(sub_id,sub_id, task, run))
                    moco1 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_{}_moco1.txt".format(sub_id,sub_id, task, run))
                    moco2 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_{}_moco2.txt".format(sub_id,sub_id, task, run))
                    moco3 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_{}_moco3.txt".format(sub_id,sub_id, task, run))
                    moco4 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_{}_moco4.txt".format(sub_id,sub_id, task, run))
                    moco5 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_{}_moco5.txt".format(sub_id,sub_id, task, run))

                else:
                    moco0 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_moco0.txt".format(sub_id,sub_id, task))
                    moco1 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_moco1.txt".format(sub_id,sub_id, task))
                    moco2 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_moco2.txt".format(sub_id,sub_id, task))
                    moco3 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_moco3.txt".format(sub_id,sub_id, task))
                    moco4 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_moco4.txt".format(sub_id,sub_id, task))
                    moco5 = os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/motion_parameters/{}_ses-1_{}_moco5.txt".format(sub_id,sub_id, task))



                df = pd.read_csv(tsv, sep="\t")
                try:
                    df['trans_x'].to_csv(moco0, index=False)
                    df['trans_y'].to_csv(moco1, index=False)
                    df['trans_z'].to_csv(moco2, index=False)
                    df['rot_x'].to_csv(moco3, index=False)
                    df['rot_y'].to_csv(moco4, index=False)
                    df['rot_z'].to_csv(moco5, index=False)
                    print(">>> Writing motion correction files.......")

                except:
                    print("CANT MAKE MOCOS ", sub_id)
                
        except:
            pass
                    
    if prep_motion_outlier == True:
        tsvs = glob.glob(os.path.join(deriv_path, "fmriprep/{}/ses-1/func/{}_ses-1_task-*-confounds_regressors.tsv".format(sub_id, sub_id)))
         
        
        report_df = pd.read_csv(report_template, sep="\t")
        report_df.set_index("subject", inplace=True)
        #print(report_df.columns.values)
        #ICD.display(report_df.loc[sub_id, 'mot_outlier_ct_r1'])
        
        for tsv in tsvs:
            
            confound_df = pd.read_csv(tsv, sep='\t')
            
            #ICD.display(confound)
            
            
            
            #print("tsv : {}".format(tsv))
            
            task=tsv.split("/")[-1].split("_")[2]
            #print(task)
            if task == "task-resting":
                run = None
            else:
                run=tsv.split("/")[-1].split("_")[3]
                    
            # set names
            if "resting" not in task:
                confound_path =os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/{}_ses-1_{}_{}_confound.txt".format(sub_id,sub_id, task, run))
            else:
                confound_path =os.path.join(deriv_path, "{}/ses-1/func/motion_assessment/{}_ses-1_{}_confound.txt".format(sub_id,sub_id, task))
            
            if fd_check == True:
                # set initial variables
                if run != None:
                    fd_total_ct = 0
                    #print(tsv)
                    nifti=os.path.join(deriv_path,
                                       'fmriprep/{}/ses-1/func/{}_ses-1_task-training_{}_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz '.format(sub_id, sub_id, run))
                    shell_cmd1 = 'fslnvols {}'.format(nifti) 
                    volume = subprocess.check_output(shell_cmd1, shell=True, encoding="utf-8")
                    volume = volume.strip()
                    comparator = int(volume) *.25   
                    #print(comparator)
                    
                    
                    # analyze FD values
                    max_fd = confound_df['framewise_displacement'].max()
                    min_fd = confound_df['framewise_displacement'].min()


                    for fd_val in  confound_df['framewise_displacement']:
                        if fd_val > 0.5:
                            #print("bad fd value, ", fd_val)
                            fd_total_ct += 1
                            
                    print("FD TOTAL: ", fd_total_ct)                
                    # fill in dictionary       
                    if run == "run-1":
                        report_df.loc[sub_id, "min_r1"]=min_fd
                        report_df.loc[sub_id, "max_r1"]=max_fd

                        if max_fd < 3.0:
                            report_df.loc[sub_id, "mcflirt_3mm_r1"]="pass"
                        else:
                            report_df.loc[sub_id, "mcflirt_3mm_r1"]="fail"

                        if fd_total_ct < comparator:
                            report_df.loc[sub_id, "fd_r1"]="pass"
                        else:
                            report_df.loc[sub_id, "fd_r1"]="fail"

                    elif run == "run-2":
                        report_df.loc[sub_id, "min_r2"]=min_fd
                        report_df.loc[sub_id, "max_r2"]=max_fd

                        if max_fd < 3.0:
                            report_df.loc[sub_id, "mcflirt_3mm_r2"]="pass"
                        else:
                            report_df.loc[sub_id, "mcflirt_3mm_r2"]="fail"

                        if fd_total_ct < comparator:
                            report_df.loc[sub_id, "fd_r2"]="pass"
                        else:
                            report_df.loc[sub_id, "fd_r2"]="fail"

                    elif run == "run-3":
                        report_df.loc[sub_id, "min_r3"]=min_fd
                        report_df.loc[sub_id, "max_r3"]=max_fd

                        if max_fd < 3.0:
                            report_df.loc[sub_id, "mcflirt_3mm_r3"]="pass"
                        else:
                            report_df.loc[sub_id, "mcflirt_3mm_r3"]="fail"


                        if fd_total_ct < comparator:
                            report_df.loc[sub_id, "fd_r3"]="pass"
                        else:
                            report_df.loc[sub_id, "fd_r3"]="fail"

                    else:
                        report_df.loc[sub_id, "min_r4"]=min_fd
                        report_df.loc[sub_id, "max_r4"]=max_fd

                        if max_fd < 3.0:
                            report_df.loc[sub_id, "mcflirt_3mm_r4"]="pass"
                        else:
                            report_df.loc[sub_id, "mcflirt_3mm_r4"]="fail"


                        if fd_total_ct < comparator:
                            report_df.loc[sub_id, "fd_r4"]="pass"
                        else:
                            report_df.loc[sub_id, "fd_r4"]="fail"

                
            if mot_outlier == True:    
                try:
                    confound= confound_df.filter(regex='motion_outlier[0-9]') 
                    mot_outlier_ct = confound.shape[1]
                    #print(sub_id, run, mot_outlier_ct)
                    #confound.to_csv(confound_path, index=False,header=False, sep="\t")
                    #print(">>> Writing confound file {} ...........".format(confound_path))
                    #temp_df = confound.apply(pd.value_counts).fillna(0)
                    #print(run)
                    print("MOT OUTLIERS: ",mot_outlier_ct)
                    if run == "run-1":
                        report_df.loc[sub_id, 'mot_outlier_ct_r1'] = mot_outlier_ct
                    elif run == "run-2":
                        report_df.loc[sub_id, 'mot_outlier_ct_r2'] = mot_outlier_ct
                    elif run == "run-3":
                        report_df.loc[sub_id, 'mot_outlier_ct_r3'] = mot_outlier_ct
                    elif run == "run-4":
                        report_df.loc[sub_id, 'mot_outlier_ct_r4'] = mot_outlier_ct
                    else:
                        pass
                    #print("wrote outlier")

                    #print(temp_df.iloc[1,].sum())
                except:
                    print("passing")
                    #os.system("touch {}".format(confound_path))
                    pass
            
            
            
    if move_anat == True:
        sub_anat_folder = glob.glob(os.path.join(deriv_path, "{}/ses-1/anat/highres.nii.gz".format(sub_id)))
    
        if not sub_anat_folder:

            t1_file = os.path.join(deriv_path, "fmriprep/{}/anat/{}_space-MNI152NLin2009cAsym_desc-preproc_T1w.nii.gz".format(sub_id,sub_id))
            anat_path = os.path.join(deriv_path, "{}/ses-1/anat".format(sub_id))
  

            try:
                print("> copying {} to {} \n\n".format(t1_file, anat_path))
                copy2(t1_file, anat_path)
                new_file = os.path.join(deriv_path, "{}/ses-1/anat/highres.nii.gz".format(sub_id))
                print("> renaming {} to {} \n\n".format(t1_file, new_file))
                os.rename(t1_file, new_file)

            except:
                print("bad subject %s \n"%sub_id)




Starting fsl_motion_outlier command
OUTLIER CMD:  fsl_motion_outliers -i /projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/sub-001/ses-1/func/sub-001_ses-1_task-resting_space-MNI152NLin2009cAsym_desc-preproc_bold_brain  -o /projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/sub-001/ses-1/func/motion_assessment/sub-001_ses-1_task-resting_space-MNI152NLin2009cAsym_desc-preproc_confound.txt --fd --thresh=0.9 -p /projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/sub-001/ses-1/func/motion_assessment/sub-001_ses-1_task-resting_space-MNI152NLin2009cAsym_desc-preproc_fd_plot -v > /projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/sub-001/ses-1/func/motion_assessment/sub-001_ses-1_task-resting_space-MNI152NLin2009cAsym_desc-preproc_outlier_output.txt
No confounds found, writing blank file......
NUM SCRUB:  0 

OUTLIER CMD:  fsl_motion_outliers -i /projects/niblab/bids_projects/Experiments/bbx/bids/derivatives/sub-001/ses-1/func/sub-001_ses-1_t

In [20]:
report_df.head()

Unnamed: 0_level_0,BIDS check,fmriprep check,Anat,epi_r1,sdc_r1,epi_r2,sdc_r2,epi_r3,sdc_r3,epi_r4,sdc_r4,From TSV Files,fd_r1,min_r1,max_r1,mcflirt_3mm_r1,fd_r2,min_r2,max_r2,mcflirt_3mm_r2,fd_r3,min_r3,max_r3,mcflirt_3mm_r3,fd_r4,min_r4,max_r4,mcflirt_3mm_r4,FSL Level 1,activation_r1,activation_r2,activation_r3,activation_r4,FSL Level 2,mot_outlier_ct_r1,mot_outlier_ct_r2,mot_outlier_ct_r3,mot_outlier_ct_r4
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
sub-001,y,--,good,good,good,good,good,good,good,good,good,--,pass,0.038943,0.937265,pass,pass,0.031288,1.492913,pass,pass,0.021486,0.864922,pass,pass,0.036686,1.315115,pass,--,,,,,,,,,
sub-002,y,--,good,good,good,good,good,good,good,good,good,--,pass,0.029952,0.582735,pass,pass,0.033639,0.349757,pass,pass,0.015229,0.491711,pass,pass,0.041908,0.701858,pass,--,,,,,,,,,
sub-003,y,--,good,good,good,good,good,good,good,good,good,--,pass,0.028813,0.566795,pass,pass,0.009842,0.440724,pass,pass,0.029835,0.642389,pass,pass,0.035671,0.470508,pass,--,,,,,,,,,
sub-004,y,--,good,good,good,good,good,good,good,good,good,--,pass,0.021921,1.766394,pass,pass,0.028898,1.020489,pass,pass,0.026123,1.388662,pass,pass,0.024134,0.881343,pass,--,,,,,,,,,
sub-005,y,--,good,good,good,good,good,good,good,good,good,--,pass,0.008253,0.590167,pass,pass,0.01132,0.927735,pass,pass,0.005587,0.600573,pass,pass,0.021201,0.675988,pass,--,,,,,,,,,


In [64]:
report_df.to_csv("qc_report_s1_12.17.tsv", sep="\t", index=True)

In [13]:
df_check = pd.read_csv("qc_report_s1_12.17.tsv", sep="\t")
df_check.head()

Unnamed: 0,subject,BIDS check,fmriprep check,Anat,epi_r1,sdc_r1,epi_r2,sdc_r2,epi_r3,sdc_r3,...,FSL Level 1,activation_r1,activation_r2,activation_r3,activation_r4,FSL Level 2,mot_outlier_ct_r1,mot_outlier_ct_r2,mot_outlier_ct_r3,mot_outlier_ct_r4
0,sub-001,y,--,good,good,good,good,good,good,good,...,--,,,,,,48.0,61.0,29.0,38.0
1,sub-002,y,--,good,good,good,good,good,good,good,...,--,,,,,,22.0,24.0,7.0,15.0
2,sub-003,y,--,good,good,good,good,good,good,good,...,--,,,,,,8.0,33.0,37.0,24.0
3,sub-004,y,--,good,good,good,good,good,good,good,...,--,,,,,,42.0,72.0,59.0,79.0
4,sub-005,y,--,good,good,good,good,good,good,good,...,--,,,,,,45.0,66.0,59.0,63.0


## Quality Check

### Lets check our derivatives folder and see what's missing  
#### Currently we are verifying our anat file exists and we are counting our confounds, onsets and mocos

In [18]:
qc_dict = {}
for sub_id in subject_ids:
    if sub_id not in qc_dict:
        qc_dict[sub_id] = {}
        
    folder_path = os.path.join(deriv_path, '{}/ses-1'.format(sub_id))
    
    anat_file = os.path.join(folder_path, "anat", "highres.nii.gz")
    func_files = glob.glob(os.path.join(folder_path, "func", "*_brain.nii.gz"))
    confound_files = glob.glob(os.path.join(folder_path, "func", "motion_assessment/*_confound.txt"))
    moco_files = glob.glob(os.path.join(folder_path, "func", "motion_assessment/motion_parameters/*.txt"))
    onset_files = glob.glob(os.path.join(folder_path, "func/onsets/*.tsv"))

    
    if not os.path.exists(anat_file):
        qc_dict[sub_id]["anat"] = "missing"
    else:
        qc_dict[sub_id]["anat"] = "good"
      
    if not func_files:
        qc_dict[sub_id]["funcs"] = "missing"
    else:
        qc_dict[sub_id]["funcs"] = len(func_files)
        
    if not confound_files:
        qc_dict[sub_id]["confounds"] = "missing"
    else:
        qc_dict[sub_id]["confounds"] = len(confound_files)
        
    if not moco_files:
        qc_dict[sub_id]["mocos"] = "missing"
    else:
        qc_dict[sub_id]["mocos"] = len(moco_files)
        
    if not onset_files:
        qc_dict[sub_id]["onsets"] = "missing"
    else:
        qc_dict[sub_id]["onsets"] = len(onset_files)
    #for file in func_files:
     #   print(file)
        
    #for file in confound_files:
     #   print(file)
    

In [19]:
qc_df = pd.DataFrame(qc_dict).T
ICD.display(qc_df)

Unnamed: 0,anat,funcs,confounds,mocos,onsets
sub-001,good,5,10,30,28
sub-002,good,5,10,30,28
sub-003,good,5,10,30,28
sub-004,good,5,10,30,21
sub-005,good,5,5,30,28
sub-006,good,5,5,30,28
sub-007,good,5,5,30,28
sub-008,good,5,5,30,28
sub-009,good,5,5,30,28
sub-010,good,5,5,30,28


In [22]:
qc_df.loc[qc_df.all(axis=1) == "missing"]


Unnamed: 0,anat,funcs,confounds,mocos,onsets


In [12]:
qc_df.to_csv(os.path.join(deriv_path, "quality_check/analysis_folder_report.tsv"), sep="\t")