In [None]:
import os
import pandas as pd
import numpy as np
import glob
import json

In [None]:
def create_tsv(ses,group,task):
    #outlier types: tsnr, snr, gsr_x, gsr_y, fd_mean, fd_perc
    
    #grab json files of all subjects in ses, group, task
    mriqc_json_files = glob.glob(f'../../../derivatives/mriqc/sub-{group}*/ses-{ses}/func/sub-{group}*_ses-{ses}_task-{task}_rec-unco_run-*_bold.json')
    
    if len(mriqc_json_files)==0:
        print(f'No mriqc output found for group {group} in session {ses} during task {task}.')
        return None
    
    #set up empty dictionary
    outlier_values_dict={'subs_runs':[],'tsnr':[],'snr':[],'gsr_x':[],'gsr_y':[],'fd_mean':[],'fd_perc':[]}
    
    for mriqc_json_file in mriqc_json_files:
        mriqc_json = open(mriqc_json_file)
        mriqc_json = json.load(mriqc_json)
        sub = mriqc_json_file.split('/')[-1].split('_')[0].split('-')[-1]
        run = mriqc_json_file.split('/')[-1].split('_')[-2].split('-')[-1]
        outlier_values_dict['subs_runs'].append(f'{sub}_{run}')
        outlier_values_dict['tsnr'].append(mriqc_json['tsnr'])
        outlier_values_dict['snr'].append(mriqc_json['snr'])
        outlier_values_dict['gsr_x'].append(mriqc_json['gsr_x'])
        outlier_values_dict['gsr_y'].append(mriqc_json['gsr_y'])
        outlier_values_dict['fd_mean'].append(mriqc_json['fd_mean'])
        outlier_values_dict['fd_perc'].append(mriqc_json['fd_perc'])
        if mriqc_json['fd_perc'] <0:
            print(mriqc_json_file)
    
    #turn dict into df
    outlier_values_df = pd.DataFrame.from_dict(outlier_values_dict)
    
    #save df as tsv
    save_task_tsv(outlier_values_df,ses,group,task)
    
    return outlier_values_df
    

In [None]:
def save_task_tsv(outlier_values_df,ses,group,task):
    
    #create paths to output dir if not exist
    derivatives_path = '../../../derivatives'
    nilearn_output_path = os.path.join(derivatives_path, 'mriqc_summaries')
    if not os.path.isdir(nilearn_output_path):
        os.makedirs (nilearn_output_path)
        
    #save outlier_values_df to files    
    outlier_values_df.to_csv(f'../../../derivatives/mriqc_summaries/group-{group}_ses-{ses}_task-{task}_rec-unco.tsv', sep='\t',index=False)
    
    return

        

In [None]:
def create_iqr_cutoffs(ses, mriqc_summary_dfs):
    #set up dictionary to store cutoff values
    iqr_cutoffs_dict={'tsnr':[],'snr':[],'gsr_x':[],'gsr_y':[],'fd_mean':[],'fd_perc':[]}
    
    #stack QM metric dfs as one df
    combined_mriqc_summary_df = pd.concat(mriqc_summary_dfs, ignore_index=True, sort=False)
    combined_mriqc_summary_df.drop(columns=['subs_runs'],inplace=True)
    
    #calculate IQR for columns
    for col in combined_mriqc_summary_df.columns:
        q25, q75 = np.percentile(combined_mriqc_summary_df[col], [25 ,75])
        iqr = q75 - q25
        iqr_cutoffs_dict[col]+=[q25-1.5*iqr,q75+1.5*iqr]
    
    #save iqr-based cutoffs 
    save_iqr_cutoffs_tsv(ses, iqr_cutoffs_dict)
    
    return

In [None]:
def save_iqr_cutoffs_tsv(ses, iqr_cutoffs_dict):
    
    #create df to save
    iqr_cutoffs_df = pd.DataFrame.from_dict(iqr_cutoffs_dict)
        
    #save iqr_cutoffs_df to files    
    iqr_cutoffs_df.to_csv(f'../../../derivatives/mriqc_summaries/iqr_cutoffs_ses-{ses}_rec-unco.tsv', sep='\t',index=False)
    
    return

        

In [None]:
ses_list=['baseline','1year']

#these all need to be present for the IQR summaries per session to be calculated properly!
group_list=['HC','MM']
task_list=['mid','sst','nback']

if len(glob.glob(f'../../../derivatives/mriqc_summaries/group-*_ses-*_task-*_rec-unco_.tsv'))!=0:
    print(f'At least some summary files exist and need to be deleted before the folder can be recreated.')

else:
    for ses in ses_list:
        outlier_dfs = []
        for group in group_list:
            for task in task_list:
                outlier_df = create_tsv(ses,group,task)
                if outlier_df is not None:
                    outlier_dfs.append(outlier_df)
        create_iqr_cutoffs(ses,outlier_dfs)
