# FSL FEAT nipype

In [1]:
import os
import glob

import nipype
import nipype.interfaces.io as nio
import nipype.interfaces.fsl as fsl
import nipype.interfaces.ants as ants
import nipype.pipeline.engine as pe
import nipype.interfaces.utility as util
import nipype.algorithms.modelgen as model

## pipeline setup

In [2]:
# Which dataset are we looking at?
# this_dataset = 'Leipzig_7T_SM'
# this_dataset = 'Leipzig_7T_GdH'
# this_dataset = 'NTNU_7T_SJSI'
# this_dataset = 'aron_3T'

In [3]:
# # general set-up
base_dir = '/home/scotti/projects/3t_7t_sst_comparison'
work_dir = os.path.join(base_dir, 'processing', 'nipype_workflow_folders')
# roi_hp_dir = os.path.join(base_dir, 'derivatives', 'fsl_feat_roi_func', this_dataset)

hpcutoff = 10000.   # in seconds
model_n = [0]
spaces = ['MNI152NLin2009cAsym']   # shouldn't touch this but just in case we _do_ want to go back to MNI....

# if this_dataset == 'Leipzig_7T_SM':
#     t_r = 3.0
#     subject_ids = [x.split('/')[-2].split('-')[-1] for x in glob.glob(os.path.join(roi_hp_dir, 'sub-*/'))]
#     numsubs = len(subject_ids)
#     task = 'stop'
# elif this_dataset == 'Leipzig_7T_GdH':
#     t_r = 2.0
#     subject_ids = [x.split('/')[-1].split('-')[-1] for x in glob.glob(os.path.join(roi_hp_dir, 'sub-*T'))]
#     numsubs = len(subject_ids)
#     task = 'stop'
# elif this_dataset == 'aron_3T':
#     t_r = 2.0
#     subject_ids = [x.split('/')[-2].split('-')[-1] for x in glob.glob(os.path.join(roi_hp_dir, 'sub-*/'))]
#     numsubs = len(subject_ids)
#     task = 'stopsignal'
# elif this_dataset == 'NTNU_7T_SJSI':
#     t_r = 1.38
# #     numsubs = 20
#     subject_ids = [x.split('/')[-2].split('-')[-1] for x in glob.glob(os.path.join(roi_hp_dir, 'sub-*/'))]
#     numsubs = len(subject_ids)
#     task = 'sst'
# elif this_dataset == 'openfmri_3T':
#     t_r = 2.0
# #     numsubs = 97
#     subject_ids = [x.split('/')[-2].split('-')[-1] for x in glob.glob(os.path.join(roi_hp_dir, 'sub-*/'))]
#     numsubs = len(subject_ids)
#     task = 'stopsignal'
    
# # make sure all files required are found correctly    
# rois_ = glob.glob(os.path.join(base_dir, 'derivatives', 'fsl_feat_roi_func', this_dataset,
#                                         'sub-*', 'func',  f'sub-*_task-{task}_run-*_space-*_desc-preproc_bold.nii.gz'))
# masks_ = glob.glob(os.path.join(base_dir, 'derivatives', 'fmriprep_feat_hack', this_dataset, 
#                               'sub-*', 'func', f'sub-*_task-{task}_run-*_space-*_desc-brain_mask.nii.gz'))
# # comp_ = glob.glob(os.path.join(base_dir, 'derivatives', 'fmriprep_feat_hack', this_dataset, 
# #                                     'sub-*', 'anat', 'sub-*_from-T1w_to-MNI152NLin2009cAsym_mode-image_xfm.h5'))
# # xfm_ = glob.glob(os.path.join(base_dir, 'derivatives', 'fmriprep_feat_hack', this_dataset,
# #                               'sub-*', 'func',  f'sub-*_task-{task}_run-*_from-scanner_to-T1w_mode-image_xfm.txt'))
# events_ = glob.glob(os.path.join(base_dir, f'derivatives/event_files/{this_dataset}/sub-*/func/sub-*run-*_events.tsv'))
    
# template_brain = os.path.join(base_dir,'sourcedata/templates/mni_icbm152_t1_tal_nlin_asym_09c_brain.nii')

In [4]:
# print(f"""
# this dataset : {this_dataset}
# tr : {t_r}
# number of subjects : {numsubs}
# subject ids : {subject_ids}
# roi_hp_dir : {roi_hp_dir}
# model ns : {model_n}
# no. rois : {len(rois_)}
# no. masks : {len(masks_)}
# no.events : {len(events_)}

# """)

In [5]:
import re
all_fns = sorted(glob.glob(os.path.join(base_dir, 'derivatives', 'fsl_feat_roi_func', '*', 'sub-*', 'func',  f'sub-*_task-*_run-*_space-*_desc-preproc_bold.nii.gz')))
regex = re.compile('.*fsl_feat_roi_func/(?P<dataset>\S+)/sub-.*/func/sub-(?P<subject>\S+)_task-.*_run-(?P<run>\d)_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz')
ds_subs = [regex.match(x).groupdict()['dataset'] + '-' + regex.match(x).groupdict()['subject'] for x in all_fns]

import numpy as np
all_subs = np.unique(ds_subs).tolist()    #### for testing only

In [6]:
datasets = ['Leipzig_7T_SM', 'Leipzig_7T_GdH', 'openfmri_3T', 'aron_3T', 'NTNU_7T_SJSI']
all_subject_ids = [x.split('-')[-1] for x in all_subs]
# all_subject_ids

# first & second level models

In [7]:
def get_session_info(subject_id, run, task='stop', this_dataset='Leipzig_7T_SM', space='T1w', shift=-1.38/2, model_n=0,
                    root_dir='/home/scotti/projects/3t_7t_sst_comparison', include_physio=True):
    # other space: MNI152NLin2009cAsym
    import pandas as pd
    import numpy as np
    import os
    from nipype.interfaces.base import Bunch
            
    ### files ###
    sub = subject_id
    event_fn = f'{root_dir}/derivatives/event_files_timelockstopsignal/{this_dataset}/sub-{sub}/func/sub-{sub}_task-{task}_run-{run}_events.tsv'
    confounds_fn = f'{root_dir}/derivatives/fmriprep/{this_dataset}/fmriprep/fmriprep/sub-{sub}/func/sub-{sub}_task-{task}_run-{run}_desc-confounds_timeseries.tsv'

    events = pd.read_csv(event_fn, sep='\t', index_col=None)
    events['duration'] = 0.1  # stick functions. Set to 0.1 so we know baseline-to-max value (0.0211 as per Jeanette Mumford)
    if model_n == 0:
        events = events.loc[events.trial_type.isin(['ss', 'fs', 'go'])]
    elif model_n == 1:
        events = events.loc[events.trial_type.isin(['response_left', 'response_right'])]
    events = events[['onset', 'trial_type', 'duration']]

    # slice time correction, nb: shift should be a negative number for STC
    events['onset'] += shift  

    ### confounds ###
    confounds = pd.read_csv(confounds_fn, sep='\t')
#        cosine_cols = [x for x in confounds.columns if 'cos' in x]
    include_confounds = ['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z', 'dvars', 'framewise_displacement'] #+ cosine_cols
    confounds = confounds[include_confounds].fillna(method='bfill')

    # get retroicor
    if include_physio:
        ## take first 20 aCompCor components
        # not all subjects have 20 compcor components, so filter
        a_comp_cor = pd.read_csv(confounds_fn, sep='\t')
        if num_acopm:=len([col for col in a_comp_cor.columns if 'a_comp_cor_' in col]) < 20:
            a_comp_cor = a_comp_cor[['a_comp_cor_' + str(x).zfill(2) for x in range(num_acopm)]]
        else: # use normal 20
            a_comp_cor[['a_comp_cor_' + str(x).zfill(2) for x in range(20)]]

    # save the confounds that we actually include in the GLM to csv, no header or index
    confounds_fn = f'{root_dir}/derivatives/confounds/{this_dataset}/sub-{sub}/func/sub-{sub}_task-{task}_run-{run}_desc-confounds_timeseries.txt'
    os.makedirs(os.path.dirname(confounds_fn), exist_ok=True)
    confounds.to_csv(confounds_fn, sep='\t', header=False, index=False)

    ### Make bunch ###
    if model_n == 0:
        conditions=['fs',
                    'ss',
                    'go']

    elif model_n ==1:
        conditions = [
                     'response_left',
                     'response_right'
                     ]

    onsets = [events.loc[events.trial_type==trial_type,'onset'].tolist() for trial_type in conditions]
    durations = [events.loc[events.trial_type==trial_type,'duration'].tolist() for trial_type in conditions]
    amplitudes = [[1] * events.loc[events.trial_type==trial_type,'onset'].shape[0] for trial_type in conditions]
#             amplitudes = [events.loc[events.trial_type==trial_type,'modulation'].tolist() for trial_type in conditions]

    info = Bunch(conditions=conditions,
                 onsets=onsets,
                 durations=durations,
                 amplitudes=amplitudes)
#         print(info)
    
    # always return info and the confounds_fn
    return info, confounds_fn #, contrasts[0]

if model_n == [0]:
    contrasts = [('fs', 'T', ['fs'], [1.0]),
                 ('ss', 'T', ['ss'], [1.0]),
                 ('go', 'T', ['go'], [1.0]),
                 ('fs-go', 'T', ['fs', 'go'], [1, -1]),
                 ('fs-ss', 'T', ['fs', 'ss'], [1, -1]),
                 ('ss-go', 'T', ['ss', 'go'], [1, -1])
                 ]
    
elif model_n == [1]:
    contrasts = [('response_left', 'T', ['response_left'], [1.0]),
                 ('response_right', 'T', ['response_right'], [1.0]),
                 ('left-right', 'T', ['response_left','response_right'], [1,-1])
                 ]

In [9]:
def get_dataset_sub(ds_sub):
    ds,sub = ds_sub.split('-')
    return ds, sub
    
def get_tr(dataset):
    if dataset=='Leipzig_7T_SM':
        t_r=3
    elif dataset=='NTNU_7T_SJSI':
        t_r=1.38
    else:
        t_r=2
    return t_r, -t_r/2

def get_task(dataset):
    if dataset=='Leipzig_7T_SM':
        task = 'stop'
    elif dataset=='Leipzig_7T_GdH':
        task='stop'
    elif dataset=='NTNU_7T_SJSI':
        task='sst'
    elif dataset=='openfmri_3T':
        task='stopsignal'
    elif dataset=='aron_3T':
        task='stopsignal'
    return task

def get_runs_per_sub(dataset, subject_id):
    # here we can add the runs per subject if some subject miss a run or two
    if dataset == 'aron_3T':
        if not subject_id in ['11','12']:
            runs = [1,2,3]
        else:
            runs = [1,2]
    elif dataset == 'openfmri_3T':
        runs = [1]
    elif dataset == 'Leipzig_7T_SM':
        if not subject_id in ['17']:
            runs = [1,2,3]
        else:
            runs = [1,2]
    elif dataset == 'Leipzig_7T_GdH':
        if not subject_id in ['DA9T']:
            runs = [1,2,3]
        else:
            runs = [1,2]
    elif dataset == 'NTNU_7T_SJSI':
        if not subject_id in ['040']:
            runs = [1,2]
        else:
            runs = [1]
    
    return runs

In [10]:
workflow = pe.Workflow(name='feat_level12_sst_roi_timelockedstopsignal')
workflow.base_dir = os.path.join(base_dir, 'processing', 'nipype_workflow_folders', 'all_datasets')
workflow.config = {"execution": {"crashdump_dir":os.path.join(base_dir, 'processing', 'crashdumps')}}

# identity
identity = pe.Node(util.IdentityInterface(fields=['ds_sub', 'space', 'model_n']), name='identity')
identity.iterables = [('ds_sub', all_subs),
                      ('space', spaces),
                      ('model_n', model_n)]

# selector
# if running only one run, copy the mask from run 1 to run 2, 'mask' variable below needs a list, it will crash if only one is found
#(terrible way to get around this.. fix at some point.. lazy..)
templates = {'roi_funcs': os.path.join(base_dir, 'derivatives', 'fsl_feat_roi_func', '{dataset}',
                                            'sub-{subject_id}', 'func',  
                                            f'sub-{{subject_id}}_task-{{task}}_run-*_space-{{space}}*_desc-preproc_bold.nii.gz'),
            }

ds_sub_getter = pe.Node(util.Function(function=get_dataset_sub,
                                        input_names=['ds_sub'],
                                        output_names=['dataset', 'subject_id']),
                         name='ds_sub_getter')
task_getter = pe.Node(util.Function(function=get_task,
                                        input_names=['dataset'],
                                        output_names=['task']),
                         name='task_getter')
tr_getter = pe.Node(util.Function(function=get_tr,
                                        input_names=['dataset'],
                                        output_names=['t_r', 'shift']),
                         name='tr_getter')


selector = pe.Node(nio.SelectFiles(templates), name='selector')

workflow.connect(identity, 'ds_sub', ds_sub_getter, 'ds_sub')

#workflow.connect(identity, 'dataset', subject_getter, 'ds')
workflow.connect(ds_sub_getter, 'subject_id', selector, 'subject_id')
workflow.connect(ds_sub_getter, 'dataset', selector, 'dataset')
workflow.connect(ds_sub_getter, 'dataset', task_getter, 'dataset')
workflow.connect(task_getter, 'task', selector, 'task')
workflow.connect(identity, 'space', selector, 'space')
workflow.connect(ds_sub_getter, 'dataset', tr_getter, 'dataset')

# get run info per sub
run_info_getter = pe.Node(util.Function(function=get_runs_per_sub,
                                        input_names=['dataset', 'subject_id'],
                                        output_names=['runs']), name='run_info_getter')
workflow.connect(ds_sub_getter, 'subject_id', run_info_getter, 'subject_id')
workflow.connect(ds_sub_getter, 'dataset', run_info_getter, 'dataset')

# session info getter
session_info_getter = pe.MapNode(util.Function(function=get_session_info,
                                     input_names=['subject_id', 'run', 'task', 'this_dataset', 'space', 'shift', 'model_n'],
                                     output_names=['session_info', 'confounds']),
                                 iterfield=['run'], 
                                 name='session_info_getter')
workflow.connect(task_getter, 'task', session_info_getter, 'task')
workflow.connect(ds_sub_getter, 'subject_id', session_info_getter, 'subject_id')
workflow.connect(tr_getter, 'shift', session_info_getter, 'shift')
#session_info_getter.inputs.task=task
#session_info_getter.inputs.this_dataset=this_dataset
session_info_getter.inputs.space='MNI152NLin2009cAsym'
#session_info_getter.inputs.shift = -t_r/2

workflow.connect(ds_sub_getter, 'dataset', session_info_getter, 'this_dataset')
workflow.connect(run_info_getter, 'runs', session_info_getter, 'run')
workflow.connect(identity, 'model_n', session_info_getter, 'model_n')


# model setup
specifymodel = pe.Node(model.SpecifyModel(), name='specifymodel1')
specifymodel.inputs.input_units = 'secs'
#specifymodel.inputs.time_repetition = t_r
specifymodel.inputs.high_pass_filter_cutoff = hpcutoff
workflow.connect(tr_getter, 't_r', specifymodel, 'time_repetition')


workflow.connect(session_info_getter, 'session_info', specifymodel, 'subject_info')
## old flow (with cosines): immediately connect to specifymodel
workflow.connect(selector, 'roi_funcs', specifymodel, 'functional_runs')

# Level 1 design
level1design = pe.Node(interface=fsl.Level1Design(), name="level1design")
# level1design.inputs.interscan_interval = t_r
workflow.connect(tr_getter, 't_r', level1design, 'interscan_interval')
level1design.inputs.bases = {'dgamma': {'derivs': True}}
level1design.inputs.contrasts = contrasts
level1design.inputs.model_serial_correlations = True

workflow.connect(specifymodel, 'session_info', level1design, 'session_info')
# workflow.connect(session_info_getter, 'contrasts', level1design, 'contrasts')

# FEAT model
modelgen = pe.MapNode(interface=fsl.FEATModel(), iterfield=['ev_files', 'fsf_file', 'args'], name='modelgen')

workflow.connect(level1design, 'ev_files', modelgen, 'ev_files')
workflow.connect(level1design, 'fsf_files', modelgen, 'fsf_file')
workflow.connect(session_info_getter, 'confounds', modelgen, 'args')   # add confounds here


# FILM GLS
iterfield = ['design_file', 'in_file', 'tcon_file']
modelestimate = pe.MapNode(interface=fsl.FILMGLS(smooth_autocorr=False, # prewhitening yes or no?
#                                                  autocorr_noestimate=True,
                                                 mask_size = 0,
#                                                 threshold = 100),  # threshold for signal, default = 1000 but will lose data that way as 1000 is higher than alot of the voxel signals
                                                 threshold = 50),  # SM: set threshold of signal to 50; background is 0, and units are PSC with mean 100
                                                 name='modelestimate',
                                                 iterfield=iterfield,
                                                 mem_gb=10,
                                                 full_data = True,
                                                 output_pwdata=True)

# ## old flow (with cosines): immediately connect to modelestimate
workflow.connect(selector, 'roi_funcs', modelestimate, 'in_file')
# # new flow: connect high-passed data
# workflow.connect(highpass, 'out_file', modelestimate, 'in_file')


workflow.connect(modelgen, 'design_file', modelestimate, 'design_file')
workflow.connect(modelgen, 'con_file', modelestimate, 'tcon_file')


#### Fixed effects
# merge copes, varcopes
copemerge = pe.MapNode(
    interface=fsl.Merge(dimension='t'),
    iterfield=['in_files'],
    name="copemerge")

varcopemerge = pe.MapNode(
    interface=fsl.Merge(dimension='t'),
    iterfield=['in_files'],
    name="varcopemerge")

maskemerge = pe.MapNode(interface=fsl.Merge(dimension='t'),
                       iterfield=['in_files'],
                       name="maskemerge")

def sort_copes(files):
    numelements = len(files[0])
    outfiles = []
    for i in range(numelements):
        outfiles.insert(i,[])
        for j, elements in enumerate(files):
            outfiles[i].append(elements[i])
    return outfiles

workflow.connect(modelestimate, ('copes',sort_copes), copemerge, 'in_files')
workflow.connect(modelestimate, ('varcopes',sort_copes), varcopemerge, 'in_files')


level2model = pe.Node(interface=fsl.L2Model(), name='l2model')
def num_copes(files):
    return len(files)
workflow.connect(modelestimate, ('copes',num_copes), level2model, 'num_copes')


pickfirst = lambda x: x[0] if isinstance(x, list) else x
flameo = pe.MapNode(
    interface=fsl.FLAMEO(run_mode='fe'),
    name="flameo",
    iterfield=['cope_file', 'var_cope_file'])
flameo.inputs.mask_file='/home/scotti/projects/3t_7t_sst_comparison/derivatives/mni3mm_brain_mask.nii.gz'

workflow.connect([
    # (selector, flameo, [(('mask', pickfirst), 'mask_file')]),
    (copemerge, flameo, [('merged_file', 'cope_file')]),
    (varcopemerge, flameo, [('merged_file', 'var_cope_file')]),
    (level2model, flameo, [('design_mat', 'design_file'),
                           ('design_con', 't_con_file'), 
                           ('design_grp', 'cov_split_file')]),
])


## datasink
ds = pe.Node(nio.DataSink(), name='datasink')
ds.inputs.base_directory = os.path.join(base_dir, 'derivatives', 'glm_feat_sst_roi_timelockstopsignal')

#_model_n_0_smoothing_fwhm_1p5_space_T1w_subject_id_002
substitutions = [(f'mni/level2_{stat_type}s/_ds_sub_{this_dataset}-{sub}_model_n_{model_n_}_space_MNI152NLin2009cAsym/_flameo{contrast_n}/{stat_type}1.nii.gz',
                  f'{this_dataset}/sub-{sub}/func/model-{model_n_}/sub-{sub}_task-sst_space-MNI152NLin2009cAsym_model-{model_n_}_contrast-{contrast_n}_desc-{stat_type}.nii.gz')
                  for sub in all_subject_ids
                  for this_dataset in datasets
                  for contrast_n in range(len(contrasts))
                  for model_n_ in model_n
                  for stat_type in ['cope', 'zstat', 'varcope', 'tdof_t']
                  ]


ds.inputs.substitutions = substitutions


## T1w-space
workflow.connect(flameo, 'zstats', ds, 'subject_level_model.mni.level2_zstats')
workflow.connect(flameo, 'copes', ds, 'subject_level_model.mni.level2_copes')
workflow.connect(flameo, 'var_copes', ds, 'subject_level_model.mni.level2_varcopes')
workflow.connect(flameo, 'tdof', ds, 'subject_level_model.mni.level2_tdof_ts')

In [None]:
workflow.run(plugin='MultiProc', plugin_args={'n_procs': 20, 'memory_gb': 200})

240713-14:49:32,258 nipype.workflow INFO:
	 Workflow feat_level12_sst_roi_timelockedstopsignal settings: ['check', 'execution', 'logging', 'monitoring']
240713-14:50:01,630 nipype.workflow INFO:
	 Running in parallel.
240713-14:50:01,678 nipype.workflow INFO:
	 [MultiProc] Running 0 tasks, and 174 jobs ready. Free memory (GB): 200.00/200.00, Free processors: 20/20.
240713-14:50:01,896 nipype.workflow INFO:
	 [Node] Setting-up "feat_level12_sst_roi_timelockedstopsignal.ds_sub_getter" in "/home/scotti/projects/3t_7t_sst_comparison/processing/nipype_workflow_folders/all_datasets/feat_level12_sst_roi_timelockedstopsignal/_ds_sub_Leipzig_7T_GdH-BI3T_model_n_0_space_MNI152NLin2009cAsym/ds_sub_getter".
240713-14:50:01,900 nipype.workflow INFO:
	 [Node] Executing "ds_sub_getter" <nipype.interfaces.utility.wrappers.Function>
240713-14:50:01,901 nipype.workflow INFO:
	 [Node] Setting-up "feat_level12_sst_roi_timelockedstopsignal.ds_sub_getter" in "/home/scotti/projects/3t_7t_sst_comparison/proce

In [13]:
!jupyter nbconvert --to script 09a_GLM_FEAT-ROI-ALL_timelockedstopsignal.ipynb

[NbConvertApp] Converting notebook 09a_GLM_FEAT-ROI-ALL_timelockedstopsignal.ipynb to script
[NbConvertApp] Writing 19292 bytes to 09a_GLM_FEAT-ROI-ALL_timelockedstopsignal.py
