# FSL level1 on Bro data

In [34]:
import os, glob
from IPython.core import display as ICD
from subprocess import check_output
import pandas as pd
import re

pd.set_option('display.max_rows', 200)


## Check available data in preprocessed/

Example subject directory holding preprocessed data:  

`bids/derivatives/preprocessed/sub-001
├── anat
├── ses-1
│   └── func
│       └── motion_assessment
│           └── motion_parameters
└── ses-2
    └── func
        └── motion_assessment
            └── motion_parameters`

In [35]:
def check_files(dir_path, sub_id,session, data_dict):
    
    funcs = glob.glob(os.path.join(dir_path, sub_id, session, "func/*brain.nii.gz"))
    func_ct = len(funcs)
    data_dict[sub_id][session]["func_ct"] = func_ct

    anat = glob.glob(os.path.join(dir_path, sub_id, "anat/*"))
    anat_ct = len(anat) 
    data_dict[sub_id][session]["anat_ct"] = anat_ct


    exp = "{}_{}*.txt".format(sub_id, session)
    onsets = glob.glob('/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/onsets/output_onsets/%s'%exp)
    onset_ct = len(onsets)
    data_dict[sub_id][session]["onset_ct"] = onset_ct

    
    confounds = glob.glob(os.path.join(dir_path, sub_id, session, "func/motion_assessment/*.txt"))
    confound_ct = len(confounds) 
    data_dict[sub_id][session]["confound_ct"] = confound_ct

    
    mocos = glob.glob(os.path.join(dir_path, sub_id, session, "func/motion_assessment/motion_parameters/*.txt"))
    moco_ct = len(mocos) 
    data_dict[sub_id][session]["moco_ct"] = moco_ct
        
        

In [53]:
print("\nBRO PREPROCESSED DATA CHECK: \n")

preproc_path="/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/preprocessed"
sessions = ['ses-1', 'ses-2']
data_dict = {}


for session in sessions:
    subjects = sorted(glob.glob(os.path.join(preproc_path, 'sub-*', session)))
    sub_ids = [x.split("/")[-2] for x in subjects]
    subjects_ct = len(subjects)
    print("Subject {} count: {}".format(session, subjects_ct))
    #print("{} subjects: \n{} \n".format(session, sub_ids))

    for sub_id in sub_ids:
        if sub_id not in data_dict:
            data_dict[sub_id] = {}
            
        if session not in data_dict[sub_id]:
            data_dict[sub_id][session] = {}
            
        
        check_files(preproc_path, sub_id, session, data_dict)

df = pd.concat({k: pd.DataFrame(v).T for k, v in data_dict.items()}, axis=0)
print("\nPREPROCESSED DIRECTORY COUNT: ")
display(df)


ses1_df = df.xs('ses-1', level=1)
ses1_good_df = ses1_df[(ses1_df.T != 0).all()]
ses1_empty_df = ses1_df[(ses1_df.T == 0).any()]



ses2_df = df.xs('ses-2', level=1)
ses2_good_df = ses2_df[(ses2_df.T != 0).all()]
ses2_empty_df = ses2_df[(ses2_df.T == 0).any()]


print("\nsession 1 good subjects: ")
print(ses1_good_df.index.values)
ICD.display(ses1_good_df.head())

print("\nsession 1 subjects with empty files: ")
ICD.display(ses1_empty_df.head())

print("\nsession 2 good subjects: ")
print(ses2_good_df.index.values)
ICD.display(ses2_good_df.head())

print("\nsession 2 subjects with empty files: ")
ICD.display(ses2_empty_df.head())




BRO PREPROCESSED DATA CHECK: 

Subject ses-1 count: 54
Subject ses-2 count: 54

PREPROCESSED DIRECTORY COUNT: 


Unnamed: 0,Unnamed: 1,func_ct,anat_ct,onset_ct,confound_ct,moco_ct
sub-001,ses-1,0,1,0,0,0
sub-001,ses-2,5,1,0,10,30
sub-002,ses-1,0,1,0,0,0
sub-002,ses-2,5,1,0,10,30
sub-003,ses-1,5,1,0,10,31
sub-003,ses-2,4,1,0,8,24
sub-004,ses-1,5,1,0,10,31
sub-004,ses-2,5,1,32,10,30
sub-006,ses-1,0,0,0,0,0
sub-006,ses-2,0,0,0,0,0



session 1 good subjects: 
['sub-020' 'sub-022' 'sub-025' 'sub-026' 'sub-028' 'sub-029' 'sub-030'
 'sub-032' 'sub-033' 'sub-035' 'sub-036' 'sub-037' 'sub-038' 'sub-039'
 'sub-040' 'sub-041' 'sub-043' 'sub-044' 'sub-045' 'sub-046' 'sub-047'
 'sub-052' 'sub-053']


Unnamed: 0,func_ct,anat_ct,onset_ct,confound_ct,moco_ct
sub-020,3,1,16,6,19
sub-022,5,1,32,10,31
sub-025,5,1,32,10,31
sub-026,5,1,32,10,31
sub-028,5,1,32,10,31



session 1 subjects with empty files: 


Unnamed: 0,func_ct,anat_ct,onset_ct,confound_ct,moco_ct
sub-001,0,1,0,0,0
sub-002,0,1,0,0,0
sub-003,5,1,0,10,31
sub-004,5,1,0,10,31
sub-006,0,0,0,0,0



session 2 good subjects: 
['sub-004' 'sub-022' 'sub-025' 'sub-026' 'sub-027' 'sub-028' 'sub-029'
 'sub-030' 'sub-032' 'sub-033' 'sub-036' 'sub-038' 'sub-039' 'sub-040'
 'sub-041' 'sub-044' 'sub-045' 'sub-046' 'sub-047' 'sub-052' 'sub-053']


Unnamed: 0,func_ct,anat_ct,onset_ct,confound_ct,moco_ct
sub-004,5,1,32,10,30
sub-022,5,1,32,10,30
sub-025,5,1,32,10,30
sub-026,5,1,32,10,30
sub-027,5,1,24,10,30



session 2 subjects with empty files: 


Unnamed: 0,func_ct,anat_ct,onset_ct,confound_ct,moco_ct
sub-001,5,1,0,10,30
sub-002,5,1,0,10,30
sub-003,4,1,0,8,24
sub-006,0,0,0,0,0
sub-007,5,1,0,10,30


## Setup FSL level 1 models  
#### model design: [link to model](https://docs.google.com/spreadsheets/d/1bj3it16jW8lASIGgL9TIAsg0x2XuOT1JL71rTaEJ3aw/edit#gid=1860969306)   
**design files on RENCI:** /projects/niblab/bids_projects/Experiments/bro/bids/derivatives/design_files

In [None]:
def make_file(sub, sub_path, sess, main_dict, task, deriv_dir, fsf_template):
    print("making file for subject ", sub)
    #for sess_id in sessions:


    if task == "resting":
        # case - no runs, only single task (i.e. resting)
        pass

    else:
        for key in main_dict[sub]:
            if key != "ANAT":
                run = key
                #print(run)
                outpath = os.path.join(deriv_dir, sub, 'func', 'Analysis', "feat1")
                if not os.path.exists(outpath):
                    os.makedirs(outpath)


                with open(fsf_template, 'r') as infile:
                    #print("Opening template file {}".format(fsf_template))
                    tempfsf = infile.read()

                    #  fill in tempfsf file with parameters
                    tempfsf = tempfsf.replace("OUTPUT", main_dict[sub][run]["OUTPUT"])
                    tempfsf = tempfsf.replace("FUNCTIONAL", main_dict[sub][run]["FUNC"])
                    tempfsf = tempfsf.replace("CONFOUND", main_dict[sub][run]['CONFOUND'])
                    tempfsf = tempfsf.replace("VOL", main_dict[sub][run]['VOL'])


                    # loop through keys in dict to find EVs and MOCOs
                    for key in main_dict[sub][run]:


                        # Fill in EVS
                        if re.match(r'EV', key):
                            ev_name= "{}_file".format(key.replace("EV_", ""))
                            ev = main_dict[sub][run][key]
                            tempfsf = tempfsf.replace(ev_name, ev)
                            #print(ev_name)
                        if re.match(r'moco', key):
                            moco_file = main_dict[sub][run][key]
                            moco_id = moco_file.split("/")[-1].split("_")[3].split(".")[0].upper()
                            tempfsf = tempfsf.replace(moco_id, moco_file)
                            #print(moco_id)

                    fsf_outfile = 'task-%s_run-%s_expanded2.0.fsf' % (task, run)
                    print(fsf_outfile)
                    #print(outpath)
                    #print(tempfsf)
                    with open(os.path.join(outpath, fsf_outfile), 'w') as outfile: #os.path.join(outpath,
                        outfile.write(tempfsf)
                    outfile.close()
                infile.close()

In [99]:
def fill_dict(sub, sub_path, main_dict, task, sess, evs, all_runs):
    
    
    #print("SUBJECT: %s \t TASK: %s \nPATH: %s"% (sub, task, sub_path))

    # only specified sessions
    #for sess_id in sessions:

    if task == 'resting':
        # case for no runs, only task (i.e. resting)
        pass
    else:
    # 2 cases: individual/given runs or all runs found

        # case 1: if flag false, grab all available runs found
        if all_runs == True:
            funcs_found = glob.glob(os.path.join(sub_path, 'func',
                                         "%s_%s_task-%s_run-*preproc*brain.nii.gz" % (sub,sess,task)))
            #print(funcs_found)
            runs=[x.split("/")[-1].split("_")[3].split("-")[1] for x in funcs_found]
            #print(runs)
            for run in runs:
                main_dict[sub][run] = {}
            #print("Dictionary initialized as: {}".format(main_dict[sub]))

            for func in funcs_found:
                x = int(run)
                run=func.split("/")[-1].split("_")[3].split("-")[1]
                #print(run)
                
                # SET OUTPUT PATH FOR FEAT DIRECTORY
                output_path=os.path.join(sub_path, 'func',
                                         'Analysis', 'feat1', 'task-%s_run-%s' %(task, run))


                # SET CONFOUND
                # sub-001_task-prob_run-1_bold_space-MNI152NLin2009cAsym_preproc_brain_confound.txt

                confound = os.path.join(sub_path, 'func', 'motion_assessment',
                                 '%s_task-%s_run-%s_bold_space-MNI152NLin2009cAsym_preproc_brain_confound.txt'%(sub, task, run))

                # SET ANAT
                anat = os.path.join(deriv_path, "preprocessed", sub, 'anat', 'highres.nii.gz')





                # FILL DICTIONARY
                main_dict[sub]['ANAT'] = anat
                main_dict[sub][run]['OUTPUT'] = output_path
                scan = func.split(".")[0]
                main_dict[sub][run]['FUNC'] = scan
                vol = check_output(['fslnvols', scan])
                vol = vol.decode('utf-8')
                vol = vol.strip('\n')
                main_dict[sub][run]['VOL'] = vol
                main_dict[sub][run]['CONFOUND'] = confound



                # TRS FROM NIFTI -- this value will always be 2, therefore we only run the check once
                trs = check_output(['fslval', '%s' % (scan), 'pixdim4', scan])
                trs = trs.decode('utf-8')
                trs = trs.strip('\n')
                # print("TRs: ", trs)

                main_dict[sub][run]['TR'] = trs


                # SET MOTION PARAMETERS
                for i in range(6):
                    motcor = os.path.join(sub_path, 'func', 'motion_assessment', 'motion_parameters',
                                      '%s_task-%s_run-%s_moco%s.txt' % (sub, task, run, i))
                    main_dict[sub][run]['moco%i' % i] = motcor


                # SET EVS
                # Loop through the given EVs and add the corresponding file to the dictionary

                ctr = 0
                onset_path = "/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/onsets/output_onsets"
                for ev_name in evs:
                    # print(item)
                    ctr = ctr + 1
                    #sub-037_ses-1_training_run-1_h2o.txt
                    ev = os.path.join(onset_path,  '%s_%s_%s_run-%s_%s.txt' % (sub, sess, task, run, ev_name))
                    
                    #print(ev)
                    # print("EV: ", ev)
                    main_dict[sub][run]['EV_%s' % ev_name] = ev
                    
                    
                    

In [106]:
def setup_lvl1_design_files(sub_ids):
    #set_paths()
    
    # removed path function for now
    print("Starting program....")
    
    deriv_dir = "/projects/niblab/bids_projects/Experiments/bro/bids/derivatives"
    main_dict = {}
    run_bash = False
    write_file = True
    sess = "ses-2"
    if write_file == True:
        ## case: Get all subjects available --add flag for individual subjects or passed list option
        for sub in sub_ids:
            sub_path = os.path.join(deriv_dir,"preprocessed", sub, sess)
            
            # set variables
            task = "training"
            evs = ['milkshake_cue', 'milkshake_delivery', 'h2O_cue', 'h2O_delivery', 'rinse']
            all_runs = True
            fsf_template = os.path.join(deriv_dir,'design_files/training_design1.fsf')


            #set_dict(sub)

            if sub not in main_dict:
                main_dict[sub] = {}


            fill_dict(sub,sub_path, main_dict, task, sess, evs, all_runs)

            def make_file(sub, sub_path, sess, main_dict, run, task, deriv_dir, fsf_template)
            #make_file(sub, main_dict, task, deriv_dir, fsf_template)
    

    if run_bash == True:
        
        subject_set = sorted([x.split("/")[-1].split("-")[1].lstrip("0") for x in subject_folders])

        bash_file = os.path.join('/projects/niblab/bids_projects/Experiments/Bevel/derivatives/code', 'feat1_exp2.0.job')
        start = subject_set[0]
        end = subject_set[-1]
        #print(start, end)
        #for sub_num in subject_set:
        shell_cmd = "sbatch --array={}-{}%{} {}".format(start, end, len(subject_set), bash_file)
        os.system(shell_cmd)
        print(shell_cmd)

            


### Run files

In [107]:
sub_ids = ses2_good_df.index.values
setup_lvl1_design_files(sub_ids)

Starting program....
{'2': {'OUTPUT': '/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/preprocessed/sub-004/ses-2/func/Analysis/feat1/task-training_run-2', 'FUNC': '/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/preprocessed/sub-004/ses-2/func/sub-004_ses-2_task-training_run-2_space-MNI152NLin2009cAsym_desc-preproc_bold_brain', 'VOL': '243', 'CONFOUND': '/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/preprocessed/sub-004/ses-2/func/motion_assessment/sub-004_task-training_run-2_bold_space-MNI152NLin2009cAsym_preproc_brain_confound.txt', 'TR': '2.000000 ', 'moco0': '/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/preprocessed/sub-004/ses-2/func/motion_assessment/motion_parameters/sub-004_task-training_run-2_moco0.txt', 'moco1': '/projects/niblab/bids_projects/Experiments/bro/bids/derivatives/preprocessed/sub-004/ses-2/func/motion_assessment/motion_parameters/sub-004_task-training_run-2_moco1.txt', 'moco2': '/projects/nibla

## QC level 1 models