In [1]:
import glob
import os
import re
import shutil
import random
import string

import numpy as np
import pandas as pd

import nilearn
from nilearn import plotting, image
from nilearn.input_data import NiftiMasker
import nibabel as nib
from nipype.interfaces import ants
import nighres

import subprocess
import json
import multiprocessing as mp
from functools import partial
import joblib
from joblib import Parallel, delayed
import itertools

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline



	 A newer version (1.8.4) of nipy/nipype is available. You are using 1.7.0


In [2]:
def find_rois(sub, atlas_name='MASSP', space='T1w'):
    # THALAMUS SUBREGIONS
    if atlas_name=='THAL':
        if space == 'MNI152NLin2009cAsym' or space == 'mni':
            print('')
        else:
            mask_dir = f'../derivatives/masks_thal_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
            fns = sorted(glob.glob(mask_dir))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
    # ATAG ATLAS        
    elif atlas_name == 'ATAG':
        if space == 'MNI152NLin2009cAsym' or space == 'mni':
            ### Rois in MNI09c-space
            mask_dir='/home/Public/trondheim/sourcedata/masks/MNI152NLin2009cAsym_res-1p5'
            fns = sorted(glob.glob(mask_dir + '/space-*'))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_res-1p5_label-(?P<label>[a-zA-Z0-9]+)_probseg_def-img.nii.gz', fn).groupdict()['label'] for fn in fns]
        else:
            mask_dir = f'../derivatives/masks_atag_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
            fns = sorted(glob.glob(mask_dir))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>[a-zA-Z0-9]+).nii.gz', fn).groupdict()['label'] for fn in fns]
    # MASSP ATLAS        
    elif atlas_name == 'MASSP':
        mask_dir = f'../derivatives/masks_massp_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
    # HARVARD OXFORD ATLAS
    elif atlas_name == 'CORT':
        mask_dir = f'../derivatives/masks_cortex_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
    #Pauli atlas
    elif atlas_name == 'Pauli':
        mask_dir = f'../derivatives/masks_Pauli_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
    #constructed FPN masks from Brodmann areas (Pijnenburg 2022)
    elif atlas_name == 'FPN':
        mask_dir = f'../derivatives/masks_FPN_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
    elif atlas_name == 'WM-rep':
        mask_dir = f'../derivatives/masks_WM-rep_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
    elif atlas_name == 'HCP_MMP1':
        mask_dir = f'../derivatives/masks_HCP_MMP1_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
    elif atlas_name == 'str':
        mask_dir = f'../derivatives/masks_str_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]

    roi_dict = dict(zip(names, fns))
    return roi_dict

def load_atlas(sub, atlas_name='MASSP', space='T1w'):
    from nilearn import image
    
    roi_dict = find_rois(sub, atlas_name, space)
    if len(roi_dict) == 0:
        warnings.warn(f'No ROIs found for sub-{sub} atlas-{atlas_name} space-{space}. Returning 0 to prevent error')
        return 0
    combined = image.concat_imgs(roi_dict.values())
    
    class AttrDict(dict):
        def __init__(self, *args, **kwargs):
            super(AttrDict, self).__init__(*args, **kwargs)
            self.__dict__ = self
            
    roi_atlas = AttrDict({'maps': combined,
                          'labels': roi_dict.keys()})
    
    return roi_atlas

# 1. Extract signals from each ROI
## Manual coded extraction - Slow

In [3]:
def get_epi(sub, ses, task, run, use_hp=False, base_dir='../derivatives/fmriprep/fmriprep'):
    if use_hp:
        epi = os.path.join('../derivatives/high_passed_func', f'sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_space-T1w_desc-preproc_bold.nii.gz')
    else:
        epi = os.path.join(base_dir, f'sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_space-T1w_desc-preproc_bold.nii.gz')
    return epi

def _make_psc(data):
    mean_img = image.mean_img(data)

    # Replace 0s for numerical reasons
    mean_data = mean_img.get_fdata()
    mean_data[mean_data == 0] = 1
    denom = image.new_img_like(mean_img, mean_data)

    return image.math_img('data / denom[..., np.newaxis] * 100 - 100',
                          data=data, denom=denom)

def do_extract(to_run, atlas_name='MASSP', overwrite=False, to_psc=False, use_hp=False):
    sub, ses, task, run = to_run
    sub = str(sub).zfill(3)
    print(f'Extracting from sub-{sub}/ses-{ses}/sub-{sub}_ses-{ses}_task-{task}_run-{run}', end='')
    
    epi_fn = get_epi(sub,ses,task,run,use_hp)
    if not os.path.exists(epi_fn):
        print('...doesnt exist, skipping'.format(sub,ses,task,run))
        return None
    
    ## dont really need to convert to psc here
    if to_psc:
        epi = _make_psc(epi_fn)
        psc_fn = '_psc'
    else:
        epi = nib.load(epi_fn)
        psc_fn = ''
    
    # might wanna ahve the hp data handy
    if use_hp:
        output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas_name}-signals{psc_fn}_hp.tsv'
    else:
        output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas_name}-signals{psc_fn}.tsv'
    
    if os.path.exists(output_fn) and not overwrite:
        print(f'{output_fn} already run, loading previous result...')
        return pd.read_csv(output_fn, sep='\t')
    
    #load & reshpae
    epi_flat = epi.get_fdata().reshape((np.product(epi.shape[:3]), epi.shape[-1]))

    # load atlas
    atlas = load_atlas(sub,atlas_name=atlas_name)
    
    dfs = []
    for i in np.arange(len(atlas.labels)):
        print('.', end='')
        label = list(atlas.labels)[i]
        mask = image.index_img(atlas.maps, i)
        mask_flat = mask.get_fdata().ravel()
        print(f'There are {np.count_nonzero(mask_flat)} voxels in region {label}')
        if mask_flat.sum() == 0: # if there are no voxels in the mask then add one voxel so code doesn't crash
            mask_flat[-1] = 1
#         print(mask_flat)
#         print(label)
        print(len(epi_flat))
        print(len(mask_flat))
        signal = pd.DataFrame(np.average(epi_flat, weights=mask_flat, axis=0), columns=[label])
        signal.index.name = 'volume'
        dfs.append(signal)

    df = pd.concat(dfs, axis=1)
    if not os.path.exists(os.path.dirname(output_fn)):
        os.makedirs(os.path.dirname(output_fn))
    df.to_csv(output_fn, sep='\t')
    print(output_fn)
    return df

In [4]:
# find all available functional runs, extract sub/ses/task/run info
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-*/func/*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]
all_combs[-10:]

[('043', 'rlsat', 'rlsat', '3'),
 ('044', 'anatomical', 'rs', '1'),
 ('044', 'anatomical', 'rs', '2'),
 ('044', 'rbrevl', 'rb', '1'),
 ('044', 'rbrevl', 'rb', '2'),
 ('044', 'rbrevl', 'revl', '1'),
 ('044', 'rbrevl', 'revl', '2'),
 ('044', 'rlsat', 'rlsat', '1'),
 ('044', 'rlsat', 'rlsat', '2'),
 ('044', 'rlsat', 'rlsat', '3')]

In [5]:
# just extract MSIT
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-sstmsit/func/*task-*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]
# all_combs = [x for x in all_combs if not '015' in x] # bad hp data for sub 15????
all_combs = [x for x in all_combs if '041' in x] # bad hp data for sub 26????
# all_combs = [x for x in all_combs if x[0] in ['002','003','004','005','006','007','008','009','010','011']]
all_combs

[('041', 'sstmsit', 'msit', '1'),
 ('041', 'sstmsit', 'msit', '2'),
 ('041', 'sstmsit', 'sst', '1'),
 ('041', 'sstmsit', 'sst', '2')]

In [7]:
# just extract SST
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-sstmsit/func/*task-msit*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]
# all_combs = [x for x in all_combs if not '015' in x] # bad hp data for sub 15????
# all_combs = [x for x in all_combs if '041' in x] # bad hp data for sub 26????
all_combs = [x for x in all_combs if x[0] in ['004','008','010','013','019','027']]
all_combs

[('004', 'sstmsit', 'msit', '1'),
 ('004', 'sstmsit', 'msit', '2'),
 ('008', 'sstmsit', 'msit', '1'),
 ('008', 'sstmsit', 'msit', '2'),
 ('010', 'sstmsit', 'msit', '1'),
 ('010', 'sstmsit', 'msit', '2'),
 ('013', 'sstmsit', 'msit', '1'),
 ('013', 'sstmsit', 'msit', '2'),
 ('019', 'sstmsit', 'msit', '1'),
 ('019', 'sstmsit', 'msit', '2'),
 ('027', 'sstmsit', 'msit', '1'),
 ('027', 'sstmsit', 'msit', '2')]

In [6]:
# just extract RBREVL
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-rbrevl/func/*task-rb*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]
# all_combs = [x for x in all_combs if not '015' in x] # bad hp data for sub 15????
all_combs = [x for x in all_combs]# if '041' in x] # bad hp data for sub 26????
# all_combs = [x for x in all_combs if x[0] in ['002','003','004','005','006','007','008','009','010','011']]
all_combs

[('002', 'rbrevl', 'rb', '1'),
 ('002', 'rbrevl', 'rb', '2'),
 ('003', 'rbrevl', 'rb', '1'),
 ('003', 'rbrevl', 'rb', '2'),
 ('004', 'rbrevl', 'rb', '1'),
 ('004', 'rbrevl', 'rb', '2'),
 ('005', 'rbrevl', 'rb', '1'),
 ('005', 'rbrevl', 'rb', '2'),
 ('006', 'rbrevl', 'rb', '1'),
 ('006', 'rbrevl', 'rb', '2'),
 ('007', 'rbrevl', 'rb', '1'),
 ('007', 'rbrevl', 'rb', '2'),
 ('008', 'rbrevl', 'rb', '1'),
 ('008', 'rbrevl', 'rb', '2'),
 ('009', 'rbrevl', 'rb', '1'),
 ('009', 'rbrevl', 'rb', '2'),
 ('010', 'rbrevl', 'rb', '1'),
 ('010', 'rbrevl', 'rb', '2'),
 ('011', 'rbrevl', 'rb', '1'),
 ('011', 'rbrevl', 'rb', '2'),
 ('012', 'rbrevl', 'rb', '1'),
 ('012', 'rbrevl', 'rb', '2'),
 ('013', 'rbrevl', 'rb', '1'),
 ('013', 'rbrevl', 'rb', '2'),
 ('014', 'rbrevl', 'rb', '1'),
 ('014', 'rbrevl', 'rb', '2'),
 ('015', 'rbrevl', 'rb', '1'),
 ('015', 'rbrevl', 'rb', '2'),
 ('016', 'rbrevl', 'rb', '1'),
 ('016', 'rbrevl', 'rb', '2'),
 ('017', 'rbrevl', 'rb', '1'),
 ('017', 'rbrevl', 'rb', '2'),
 ('018',

In [9]:
# just extract MSIT
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-rlsat/func/*task-*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]
all_combs

[('002', 'rlsat', 'rlsat', '1'),
 ('002', 'rlsat', 'rlsat', '2'),
 ('002', 'rlsat', 'rlsat', '3'),
 ('003', 'rlsat', 'rlsat', '1'),
 ('003', 'rlsat', 'rlsat', '2'),
 ('003', 'rlsat', 'rlsat', '3'),
 ('004', 'rlsat', 'rlsat', '1'),
 ('004', 'rlsat', 'rlsat', '2'),
 ('004', 'rlsat', 'rlsat', '3'),
 ('005', 'rlsat', 'rlsat', '1'),
 ('005', 'rlsat', 'rlsat', '2'),
 ('005', 'rlsat', 'rlsat', '3'),
 ('006', 'rlsat', 'rlsat', '1'),
 ('006', 'rlsat', 'rlsat', '2'),
 ('006', 'rlsat', 'rlsat', '3'),
 ('007', 'rlsat', 'rlsat', '1'),
 ('007', 'rlsat', 'rlsat', '2'),
 ('007', 'rlsat', 'rlsat', '3'),
 ('008', 'rlsat', 'rlsat', '1'),
 ('008', 'rlsat', 'rlsat', '2'),
 ('008', 'rlsat', 'rlsat', '3'),
 ('009', 'rlsat', 'rlsat', '1'),
 ('009', 'rlsat', 'rlsat', '2'),
 ('009', 'rlsat', 'rlsat', '3'),
 ('010', 'rlsat', 'rlsat', '1'),
 ('010', 'rlsat', 'rlsat', '2'),
 ('010', 'rlsat', 'rlsat', '3'),
 ('011', 'rlsat', 'rlsat', '1'),
 ('011', 'rlsat', 'rlsat', '2'),
 ('011', 'rlsat', 'rlsat', '3'),
 ('012', '

In [34]:
# all_subs = np.arange(2,28)
# all_ses = ['rlsat', 'rbrevl', 'anatomical', 'sstmsit']
# all_tasks = ['rs', 'rlsat', 'rb', 'revl', 'sst', 'msit']
# all_runs = [1,2,3]

# all_combs = list(itertools.product(all_subs,all_ses,all_tasks,all_runs))
# all_combs = [x for x in all_combs if (x[1]=='rlsat' and x[2]=='rlsat') or (x[1]=='rbrevl' and x[2] in ['rb', 'revl'] and x[3]<3) or (x[1]=='sstmsit' and x[2] in ['sst', 'msit'] and x[3]<3) or (x[1]=='anatomical' and x[2]=='rs' and x[3]<3)]
# #do_extract(all_combs[0], overwrite=True)

In [5]:
def check_affines(sub):
    sub = str(sub).zfill(3)
    all_funcs = sorted(glob.glob(f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses*/func/sub*_space-T1w_desc-preproc_bold.nii.gz'))
    all_affines = [nib.load(x).affine for x in all_funcs]
    return (np.array(all_affines)[0] == np.array(all_affines)).all()

In [8]:
#all_atlases=['MASSP','CORT','ATAG']#,'THAL'] #['Pauli']
# all_atlases=['HCP_MMP1']
# all_atlases = ['CORT']
all_atlases = ['str','MASSP','CORT','ATAG']

hp_options= [True,False]
overwrite=False
psc=False

for atlas_name in all_atlases:
    for hp in hp_options:
        for i, comb in enumerate(all_combs):
            print(f'atlas-{atlas_name} hp-{hp}')
            print(comb)
            sub = comb[0]
            if check_affines(sub):
                do_extract(comb, atlas_name=atlas_name, overwrite=overwrite, to_psc=psc, use_hp=hp)
            else:
                print(f'Affines for sub {sub} not identical')

atlas-str hp-True
('004', 'sstmsit', 'msit', '1')
Extracting from sub-004/ses-sstmsit/sub-004_ses-sstmsit_task-msit_run-1.There are 383 voxels in region Accumbens-l
1060320
1060320
.There are 423 voxels in region Accumbens-r
1060320
1060320
.There are 1824 voxels in region Caudate-l
1060320
1060320
.There are 1973 voxels in region Caudate-r
1060320
1060320
.There are 2281 voxels in region Putamen-l
1060320
1060320
.There are 2274 voxels in region Putamen-r
1060320
1060320
../derivatives/extracted_signals/sub-004/ses-sstmsit/func/sub-004_ses-sstmsit_task-msit_run-1_desc-str-signals_hp.tsv
atlas-str hp-True
('004', 'sstmsit', 'msit', '2')
Extracting from sub-004/ses-sstmsit/sub-004_ses-sstmsit_task-msit_run-2.There are 383 voxels in region Accumbens-l
1060320
1060320
.There are 423 voxels in region Accumbens-r
1060320
1060320
.There are 1824 voxels in region Caudate-l
1060320
1060320
.There are 1973 voxels in region Caudate-r
1060320
1060320
.There are 2281 voxels in region Putamen-l
106

Extracting from sub-008/ses-sstmsit/sub-008_ses-sstmsit_task-msit_run-1.There are 363 voxels in region Accumbens-l
1048800
1048800
.There are 342 voxels in region Accumbens-r
1048800
1048800
.There are 1882 voxels in region Caudate-l
1048800
1048800
.There are 1725 voxels in region Caudate-r
1048800
1048800
.There are 2498 voxels in region Putamen-l
1048800
1048800
.There are 2472 voxels in region Putamen-r
1048800
1048800
../derivatives/extracted_signals/sub-008/ses-sstmsit/func/sub-008_ses-sstmsit_task-msit_run-1_desc-str-signals.tsv
atlas-str hp-False
('008', 'sstmsit', 'msit', '2')
Extracting from sub-008/ses-sstmsit/sub-008_ses-sstmsit_task-msit_run-2.There are 363 voxels in region Accumbens-l
1048800
1048800
.There are 342 voxels in region Accumbens-r
1048800
1048800
.There are 1882 voxels in region Caudate-l
1048800
1048800
.There are 1725 voxels in region Caudate-r
1048800
1048800
.There are 2498 voxels in region Putamen-l
1048800
1048800
.There are 2472 voxels in region Putame

.There are 554 voxels in region GPe-r
1060320
1060320
.There are 212 voxels in region GPi-l
1060320
1060320
.There are 217 voxels in region GPi-r
1060320
1060320
.There are 2774 voxels in region LV-l
1060320
1060320
.There are 3257 voxels in region LV-r
1060320
1060320
.There are 134 voxels in region PAG-l
1060320
1060320
.There are 138 voxels in region PAG-r
1060320
1060320
.There are 95 voxels in region PPN-l
1060320
1060320
.There are 90 voxels in region PPN-r
1060320
1060320
.There are 98 voxels in region RN-l
1060320
1060320
.There are 105 voxels in region RN-r
1060320
1060320
.There are 252 voxels in region SN-l
1060320
1060320
.There are 234 voxels in region SN-r
1060320
1060320
.There are 44 voxels in region STN-l
1060320
1060320
.There are 57 voxels in region STN-r
1060320
1060320
.There are 4026 voxels in region Str-l
1060320
1060320
.There are 3722 voxels in region Str-r
1060320
1060320
.There are 2584 voxels in region Tha-l
1060320
1060320
.There are 2466 voxels in region T

.There are 293 voxels in region SN-l
1097652
1097652
.There are 257 voxels in region SN-r
1097652
1097652
.There are 32 voxels in region STN-l
1097652
1097652
.There are 38 voxels in region STN-r
1097652
1097652
.There are 4126 voxels in region Str-l
1097652
1097652
.There are 4093 voxels in region Str-r
1097652
1097652
.There are 2803 voxels in region Tha-l
1097652
1097652
.There are 2831 voxels in region Tha-r
1097652
1097652
.There are 176 voxels in region VTA-l
1097652
1097652
.There are 193 voxels in region VTA-r
1097652
1097652
.There are 892 voxels in region fx
1097652
1097652
.There are 1269 voxels in region ic-l
1097652
1097652
.There are 1416 voxels in region ic-r
1097652
1097652
../derivatives/extracted_signals/sub-010/ses-sstmsit/func/sub-010_ses-sstmsit_task-msit_run-2_desc-MASSP-signals_hp.tsv
atlas-MASSP hp-True
('013', 'sstmsit', 'msit', '1')
Extracting from sub-013/ses-sstmsit/sub-013_ses-sstmsit_task-msit_run-1.There are 935 voxels in region 3V
866700
866700
.There ar

Extracting from sub-027/ses-sstmsit/sub-027_ses-sstmsit_task-msit_run-1.There are 929 voxels in region 3V
1098108
1098108
.There are 465 voxels in region 4V
1098108
1098108
.There are 533 voxels in region Amg-l
1098108
1098108
.There are 487 voxels in region Amg-r
1098108
1098108
.There are 601 voxels in region Cl-l
1098108
1098108
.There are 496 voxels in region Cl-r
1098108
1098108
.There are 423 voxels in region GPe-l
1098108
1098108
.There are 385 voxels in region GPe-r
1098108
1098108
.There are 161 voxels in region GPi-l
1098108
1098108
.There are 171 voxels in region GPi-r
1098108
1098108
.There are 2393 voxels in region LV-l
1098108
1098108
.There are 2352 voxels in region LV-r
1098108
1098108
.There are 133 voxels in region PAG-l
1098108
1098108
.There are 138 voxels in region PAG-r
1098108
1098108
.There are 93 voxels in region PPN-l
1098108
1098108
.There are 105 voxels in region PPN-r
1098108
1098108
.There are 107 voxels in region RN-l
1098108
1098108
.There are 105 voxels

.There are 3723 voxels in region LV-l
1048800
1048800
.There are 2355 voxels in region LV-r
1048800
1048800
.There are 138 voxels in region PAG-l
1048800
1048800
.There are 137 voxels in region PAG-r
1048800
1048800
.There are 94 voxels in region PPN-l
1048800
1048800
.There are 107 voxels in region PPN-r
1048800
1048800
.There are 92 voxels in region RN-l
1048800
1048800
.There are 100 voxels in region RN-r
1048800
1048800
.There are 272 voxels in region SN-l
1048800
1048800
.There are 265 voxels in region SN-r
1048800
1048800
.There are 41 voxels in region STN-l
1048800
1048800
.There are 58 voxels in region STN-r
1048800
1048800
.There are 4042 voxels in region Str-l
1048800
1048800
.There are 3898 voxels in region Str-r
1048800
1048800
.There are 2709 voxels in region Tha-l
1048800
1048800
.There are 2648 voxels in region Tha-r
1048800
1048800
.There are 194 voxels in region VTA-l
1048800
1048800
.There are 227 voxels in region VTA-r
1048800
1048800
.There are 956 voxels in region 

.There are 3080 voxels in region Str-l
866700
866700
.There are 2975 voxels in region Str-r
866700
866700
.There are 2344 voxels in region Tha-l
866700
866700
.There are 2360 voxels in region Tha-r
866700
866700
.There are 160 voxels in region VTA-l
866700
866700
.There are 168 voxels in region VTA-r
866700
866700
.There are 827 voxels in region fx
866700
866700
.There are 937 voxels in region ic-l
866700
866700
.There are 935 voxels in region ic-r
866700
866700
../derivatives/extracted_signals/sub-013/ses-sstmsit/func/sub-013_ses-sstmsit_task-msit_run-1_desc-MASSP-signals.tsv
atlas-MASSP hp-False
('013', 'sstmsit', 'msit', '2')
Extracting from sub-013/ses-sstmsit/sub-013_ses-sstmsit_task-msit_run-2.There are 935 voxels in region 3V
866700
866700
.There are 362 voxels in region 4V
866700
866700
.There are 428 voxels in region Amg-l
866700
866700
.There are 398 voxels in region Amg-r
866700
866700
.There are 353 voxels in region Cl-l
866700
866700
.There are 359 voxels in region Cl-r
86

.There are 533 voxels in region Amg-l
1098108
1098108
.There are 487 voxels in region Amg-r
1098108
1098108
.There are 601 voxels in region Cl-l
1098108
1098108
.There are 496 voxels in region Cl-r
1098108
1098108
.There are 423 voxels in region GPe-l
1098108
1098108
.There are 385 voxels in region GPe-r
1098108
1098108
.There are 161 voxels in region GPi-l
1098108
1098108
.There are 171 voxels in region GPi-r
1098108
1098108
.There are 2393 voxels in region LV-l
1098108
1098108
.There are 2352 voxels in region LV-r
1098108
1098108
.There are 133 voxels in region PAG-l
1098108
1098108
.There are 138 voxels in region PAG-r
1098108
1098108
.There are 93 voxels in region PPN-l
1098108
1098108
.There are 105 voxels in region PPN-r
1098108
1098108
.There are 107 voxels in region RN-l
1098108
1098108
.There are 105 voxels in region RN-r
1098108
1098108
.There are 256 voxels in region SN-l
1098108
1098108
.There are 227 voxels in region SN-r
1098108
1098108
.There are 56 voxels in region STN-

.There are 2085 voxels in region IFG-l
1097652
1097652
.There are 2113 voxels in region IFG-r
1097652
1097652
.There are 20241 voxels in region Ins-l
1097652
1097652
.There are 3551 voxels in region Ins-r
1097652
1097652
.There are 4033 voxels in region PaCG-l
1097652
1097652
.There are 4386 voxels in region PaCG-r
1097652
1097652
.There are 1837 voxels in region SMA-l
1097652
1097652
.There are 1988 voxels in region SMA-r
1097652
1097652
.There are 4254 voxels in region SPL-l
1097652
1097652
.There are 3797 voxels in region SPL-r
1097652
1097652
.There are 2016 voxels in region aSG-l
1097652
1097652
.There are 1820 voxels in region aSG-r
1097652
1097652
.There are 3108 voxels in region pCC-l
1097652
1097652
.There are 3076 voxels in region pCC-r
1097652
1097652
.There are 2349 voxels in region pSG-l
1097652
1097652
.There are 3138 voxels in region pSG-r
1097652
1097652
.There are 7745 voxels in region postcG-l
1097652
1097652
.There are 8050 voxels in region postcG-r
1097652
1097652
.

.There are 3233 voxels in region SPL-r
987188
987188
.There are 2106 voxels in region aSG-l
987188
987188
.There are 1659 voxels in region aSG-r
987188
987188
.There are 2688 voxels in region pCC-l
987188
987188
.There are 2430 voxels in region pCC-r
987188
987188
.There are 3648 voxels in region pSG-l
987188
987188
.There are 3095 voxels in region pSG-r
987188
987188
.There are 6616 voxels in region postcG-l
987188
987188
.There are 6202 voxels in region postcG-r
987188
987188
.There are 5064 voxels in region precC-l
987188
987188
.There are 5124 voxels in region precC-r
987188
987188
.There are 8182 voxels in region precGy-l
987188
987188
.There are 8453 voxels in region precGy-r
987188
987188
../derivatives/extracted_signals/sub-019/ses-sstmsit/func/sub-019_ses-sstmsit_task-msit_run-2_desc-CORT-signals_hp.tsv
atlas-CORT hp-True
('027', 'sstmsit', 'msit', '1')
Extracting from sub-027/ses-sstmsit/sub-027_ses-sstmsit_task-msit_run-1.There are 3375 voxels in region ACC-l
1098108
1098108

.There are 3346 voxels in region pSG-r
1048800
1048800
.There are 9162 voxels in region postcG-l
1048800
1048800
.There are 8552 voxels in region postcG-r
1048800
1048800
.There are 7138 voxels in region precC-l
1048800
1048800
.There are 6916 voxels in region precC-r
1048800
1048800
.There are 10247 voxels in region precGy-l
1048800
1048800
.There are 10606 voxels in region precGy-r
1048800
1048800
../derivatives/extracted_signals/sub-008/ses-sstmsit/func/sub-008_ses-sstmsit_task-msit_run-1_desc-CORT-signals.tsv
atlas-CORT hp-False
('008', 'sstmsit', 'msit', '2')
Extracting from sub-008/ses-sstmsit/sub-008_ses-sstmsit_task-msit_run-2.There are 3264 voxels in region ACC-l
1048800
1048800
.There are 3044 voxels in region ACC-r
1048800
1048800
.There are 1993 voxels in region IFG-l
1048800
1048800
.There are 1650 voxels in region IFG-r
1048800
1048800
.There are 18277 voxels in region Ins-l
1048800
1048800
.There are 3055 voxels in region Ins-r
1048800
1048800
.There are 3907 voxels in r

../derivatives/extracted_signals/sub-013/ses-sstmsit/func/sub-013_ses-sstmsit_task-msit_run-2_desc-CORT-signals.tsv
atlas-CORT hp-False
('019', 'sstmsit', 'msit', '1')
Extracting from sub-019/ses-sstmsit/sub-019_ses-sstmsit_task-msit_run-1.There are 2614 voxels in region ACC-l
987188
987188
.There are 2289 voxels in region ACC-r
987188
987188
.There are 1867 voxels in region IFG-l
987188
987188
.There are 1424 voxels in region IFG-r
987188
987188
.There are 17210 voxels in region Ins-l
987188
987188
.There are 3145 voxels in region Ins-r
987188
987188
.There are 3275 voxels in region PaCG-l
987188
987188
.There are 3216 voxels in region PaCG-r
987188
987188
.There are 1521 voxels in region SMA-l
987188
987188
.There are 1428 voxels in region SMA-r
987188
987188
.There are 2479 voxels in region SPL-l
987188
987188
.There are 3233 voxels in region SPL-r
987188
987188
.There are 2106 voxels in region aSG-l
987188
987188
.There are 1659 voxels in region aSG-r
987188
987188
.There are 2688 

.There are 8074 voxels in region lM1
1060320
1060320
.There are 2534 voxels in region lPreSMA
1060320
1060320
.There are 1122 voxels in region lSN
1060320
1060320
.There are 531 voxels in region lSTN
1060320
1060320
.There are 9800 voxels in region lSTR
1060320
1060320
.There are 617 voxels in region lVTA
1060320
1060320
.There are 2390 voxels in region rGPe
1060320
1060320
.There are 1377 voxels in region rGPi
1060320
1060320
.There are 11839 voxels in region rIFG
1060320
1060320
.There are 32 voxels in region rLC
1060320
1060320
.There are 9004 voxels in region rM1
1060320
1060320
.There are 2629 voxels in region rPreSMA
1060320
1060320
.There are 1140 voxels in region rSN
1060320
1060320
.There are 583 voxels in region rSTN
1060320
1060320
.There are 10225 voxels in region rSTR
1060320
1060320
.There are 652 voxels in region rVTA
1060320
1060320
../derivatives/extracted_signals/sub-004/ses-sstmsit/func/sub-004_ses-sstmsit_task-msit_run-2_desc-ATAG-signals_hp.tsv
atlas-ATAG hp-True
(

.There are 14506 voxels in region M1
866700
866700
.There are 10519 voxels in region THA
866700
866700
.There are 1813 voxels in region lGPe
866700
866700
.There are 1046 voxels in region lGPi
866700
866700
.There are 22 voxels in region lLC
866700
866700
.There are 7500 voxels in region lM1
866700
866700
.There are 1638 voxels in region lPreSMA
866700
866700
.There are 999 voxels in region lSN
866700
866700
.There are 442 voxels in region lSTN
866700
866700
.There are 7816 voxels in region lSTR
866700
866700
.There are 497 voxels in region lVTA
866700
866700
.There are 1888 voxels in region rGPe
866700
866700
.There are 1103 voxels in region rGPi
866700
866700
.There are 10532 voxels in region rIFG
866700
866700
.There are 20 voxels in region rLC
866700
866700
.There are 7006 voxels in region rM1
866700
866700
.There are 2037 voxels in region rPreSMA
866700
866700
.There are 926 voxels in region rSN
866700
866700
.There are 486 voxels in region rSTN
866700
866700
.There are 7924 voxel

Extracting from sub-004/ses-sstmsit/sub-004_ses-sstmsit_task-msit_run-2.There are 16192 voxels in region ACC
1060320
1060320
.There are 17078 voxels in region M1
1060320
1060320
.There are 11186 voxels in region THA
1060320
1060320
.There are 2401 voxels in region lGPe
1060320
1060320
.There are 1342 voxels in region lGPi
1060320
1060320
.There are 31 voxels in region lLC
1060320
1060320
.There are 8074 voxels in region lM1
1060320
1060320
.There are 2534 voxels in region lPreSMA
1060320
1060320
.There are 1122 voxels in region lSN
1060320
1060320
.There are 531 voxels in region lSTN
1060320
1060320
.There are 9800 voxels in region lSTR
1060320
1060320
.There are 617 voxels in region lVTA
1060320
1060320
.There are 2390 voxels in region rGPe
1060320
1060320
.There are 1377 voxels in region rGPi
1060320
1060320
.There are 11839 voxels in region rIFG
1060320
1060320
.There are 32 voxels in region rLC
1060320
1060320
.There are 9004 voxels in region rM1
1060320
1060320
.There are 2629 vox

.There are 7924 voxels in region rSTR
866700
866700
.There are 544 voxels in region rVTA
866700
866700
../derivatives/extracted_signals/sub-013/ses-sstmsit/func/sub-013_ses-sstmsit_task-msit_run-1_desc-ATAG-signals.tsv
atlas-ATAG hp-False
('013', 'sstmsit', 'msit', '2')
Extracting from sub-013/ses-sstmsit/sub-013_ses-sstmsit_task-msit_run-2.There are 13583 voxels in region ACC
866700
866700
.There are 14506 voxels in region M1
866700
866700
.There are 10519 voxels in region THA
866700
866700
.There are 1813 voxels in region lGPe
866700
866700
.There are 1046 voxels in region lGPi
866700
866700
.There are 22 voxels in region lLC
866700
866700
.There are 7500 voxels in region lM1
866700
866700
.There are 1638 voxels in region lPreSMA
866700
866700
.There are 999 voxels in region lSN
866700
866700
.There are 442 voxels in region lSTN
866700
866700
.There are 7816 voxels in region lSTR
866700
866700
.There are 497 voxels in region lVTA
866700
866700
.There are 1888 voxels in region rGPe
86

In [None]:
#all_atlases=['MASSP','CORT','ATAG']#,'THAL'] #['Pauli']
all_atlases=['THAL']

hp_options= [False]#[True,False]
overwrite=False
psc=False

for atlas_name in all_atlases:
    for hp in hp_options:
        for i, comb in enumerate(all_combs):
            print(f'atlas-{atlas_name} hp-{hp}')
            print(comb)
            sub = comb[0]
            if check_affines(sub):
                do_extract(comb, atlas_name=atlas_name, overwrite=overwrite, to_psc=psc, use_hp=hp)
            else:
                print(f'Affines for sub {sub} not identical')

atlas-THAL hp-False
('002', 'rlsat', 'rlsat', '1')
Extracting from sub-002/ses-rlsat/sub-002_ses-rlsat_task-rlsat_run-1.There are 59 voxels in region AV-l
.There are 61 voxels in region AV-r
.There are 7 voxels in region CL-l
.There are 6 voxels in region CL-r
.There are 91 voxels in region CM-l
.There are 91 voxels in region CM-r
.There are 36 voxels in region CeM-l
.There are 47 voxels in region CeM-r
.There are 24 voxels in region LD-l
.There are 24 voxels in region LD-r
.There are 108 voxels in region LGN-l
.There are 112 voxels in region LGN-r
.There are 45 voxels in region LP-l
.There are 40 voxels in region LP-r
.There are 14 voxels in region LSg-l
.There are 18 voxels in region LSg-r
.There are 133 voxels in region MDl-l
.There are 131 voxels in region MDl-r
.There are 357 voxels in region MDm-l
.There are 336 voxels in region MDm-r
.There are 53 voxels in region MGN-l
.There are 62 voxels in region MGN-r
.There are 7 voxels in region MV-l
.There are 9 voxels in region MV-r
.Th

In [30]:
#all_atlases=['MASSP','CORT','ATAG']#,'THAL'] #['Pauli']
all_atlases=['WM-rep']

hp_options= [False]#[True,False]
overwrite=False
psc=False

for atlas_name in all_atlases:
    for hp in hp_options:
        for i, comb in enumerate(all_combs):
            print(f'atlas-{atlas_name} hp-{hp}')
            print(comb)
            sub = comb[0]
            print(sub)
            if check_affines(sub):
                do_extract(comb, atlas_name=atlas_name, overwrite=overwrite, to_psc=psc, use_hp=hp)
            else:
                print(f'Affines for sub {sub} not identical')

atlas-WM-rep hp-False
('002', 'rbrevl', 'rb', '1')
002
Extracting from sub-002/ses-rbrevl/sub-002_ses-rbrevl_task-rb_run-1../derivatives/extracted_signals/sub-002/ses-rbrevl/func/sub-002_ses-rbrevl_task-rb_run-1_desc-WM-rep-signals.tsv already run, loading previous result...
atlas-WM-rep hp-False
('002', 'rbrevl', 'rb', '2')
002
Extracting from sub-002/ses-rbrevl/sub-002_ses-rbrevl_task-rb_run-2../derivatives/extracted_signals/sub-002/ses-rbrevl/func/sub-002_ses-rbrevl_task-rb_run-2_desc-WM-rep-signals.tsv already run, loading previous result...
atlas-WM-rep hp-False
('003', 'rbrevl', 'rb', '1')
003
Extracting from sub-003/ses-rbrevl/sub-003_ses-rbrevl_task-rb_run-1../derivatives/extracted_signals/sub-003/ses-rbrevl/func/sub-003_ses-rbrevl_task-rb_run-1_desc-WM-rep-signals.tsv already run, loading previous result...
atlas-WM-rep hp-False
('003', 'rbrevl', 'rb', '2')
003
Extracting from sub-003/ses-rbrevl/sub-003_ses-rbrevl_task-rb_run-2../derivatives/extracted_signals/sub-003/ses-rbrev

ValueError: Field of view of image #2 is different from reference FOV.
Reference affine:
array([[  1.5       ,   0.        ,  -0.        , -75.80429077],
       [  0.        ,   1.5       ,  -0.        , -82.96203613],
       [  0.        ,   0.        ,   1.5       , -90.03024292],
       [  0.        ,   0.        ,   0.        ,   1.        ]])
Image affine:
array([[  1.5       ,   0.        ,   0.        , -75.80429077],
       [  0.        ,   1.5       ,   0.        , -82.96203613],
       [  0.        ,   0.        ,   1.5       , -91.53024292],
       [  0.        ,   0.        ,   0.        ,   1.        ]])
Reference shape:
(102, 115, 99)
Image shape:
(102, 115, 100, 1)


In [24]:
all_funcs = sorted(glob.glob(f'../derivatives/fmriprep/fmriprep/sub-026/ses*/func/sub*_space-T1w_desc-preproc_bold.nii.gz'))
all_funcs

['../derivatives/fmriprep/fmriprep/sub-026/ses-anatomical/func/sub-026_ses-anatomical_task-rs_run-1_space-T1w_desc-preproc_bold.nii.gz',
 '../derivatives/fmriprep/fmriprep/sub-026/ses-anatomical/func/sub-026_ses-anatomical_task-rs_run-2_space-T1w_desc-preproc_bold.nii.gz',
 '../derivatives/fmriprep/fmriprep/sub-026/ses-mrlc/func/sub-026_ses-mrlc_task-mt_run-1_space-T1w_desc-preproc_bold.nii.gz',
 '../derivatives/fmriprep/fmriprep/sub-026/ses-mrlc/func/sub-026_ses-mrlc_task-mt_run-2_space-T1w_desc-preproc_bold.nii.gz',
 '../derivatives/fmriprep/fmriprep/sub-026/ses-mrlc/func/sub-026_ses-mrlc_task-mt_run-3_space-T1w_desc-preproc_bold.nii.gz',
 '../derivatives/fmriprep/fmriprep/sub-026/ses-mrlc/func/sub-026_ses-mrlc_task-mt_run-4_space-T1w_desc-preproc_bold.nii.gz',
 '../derivatives/fmriprep/fmriprep/sub-026/ses-rbrevl/func/sub-026_ses-rbrevl_task-rb_run-1_space-T1w_desc-preproc_bold.nii.gz',
 '../derivatives/fmriprep/fmriprep/sub-026/ses-rbrevl/func/sub-026_ses-rbrevl_task-rb_run-2_space

In [27]:
check_affines('026')

True

# 2 extract whole roi signal from thalamus

In [None]:
def find_rois(sub, atlas_name='MASSP', space='T1w'):
    if atlas_name=='THAL':
        if space == 'MNI152NLin2009cAsym' or space == 'mni':
            print('')
        else:
            mask_dir = f'../derivatives/masks_thal_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
            fns = sorted(glob.glob(mask_dir))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]
            
    if atlas_name == 'ATAG':
        if space == 'MNI152NLin2009cAsym' or space == 'mni':
            ### Rois in MNI09c-space
            mask_dir='/home/Public/trondheim/sourcedata/masks/MNI152NLin2009cAsym_res-1p5'
            fns = sorted(glob.glob(mask_dir + '/space-*'))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_res-1p5_label-(?P<label>[a-zA-Z0-9]+)_probseg_def-img.nii.gz', fn).groupdict()['label'] for fn in fns]
        else:
            mask_dir = f'../derivatives/masks_atag_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
            fns = sorted(glob.glob(mask_dir))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>[a-zA-Z0-9]+).nii.gz', fn).groupdict()['label'] for fn in fns]
            
    elif atlas_name == 'MASSP':
        mask_dir = f'../derivatives/masks_massp_func/sub-{sub}/anat/sub-{sub}_*Tha*.nii.gz' # only thalamus
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]

    roi_dict = dict(zip(names, fns))
    return roi_dict

def load_atlas(sub, atlas_name='MASSP', space='T1w'):
    from nilearn import image
    
    roi_dict = find_rois(sub, atlas_name, space)
    combined = image.concat_imgs(roi_dict.values())
    
    class AttrDict(dict):
        def __init__(self, *args, **kwargs):
            super(AttrDict, self).__init__(*args, **kwargs)
            self.__dict__ = self
            
    roi_atlas = AttrDict({'maps': combined,
                          'labels': roi_dict.keys()})
    
    return roi_atlas

def get_epi(sub, ses, task, run, use_hp=False, base_dir='../derivatives/fmriprep/fmriprep'):
    if use_hp:
        epi = os.path.join('../derivatives/high_passed_func', f'sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_space-T1w_desc-preproc_bold.nii.gz')
    else:
        epi = os.path.join(base_dir, f'sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_space-T1w_desc-preproc_bold.nii.gz')
    return epi

def _make_psc(data):
    mean_img = image.mean_img(data)

    # Replace 0s for numerical reasons
    mean_data = mean_img.get_fdata()
    mean_data[mean_data == 0] = 1
    denom = image.new_img_like(mean_img, mean_data)

    return image.math_img('data / denom[..., np.newaxis] * 100 - 100',
                          data=data, denom=denom)

def do_extract(to_run, atlas='MASSP', overwrite=False, to_psc=False, use_hp=False):
    sub, ses, task, run = to_run
    sub = str(sub).zfill(3)
    print(f'Extracting from sub-{sub}/ses-{ses}/sub-{sub}_ses-{ses}_task-{task}_run-{run}', end='')
    
    epi_fn = get_epi(sub,ses,task,run,use_hp)
    if not os.path.exists(epi_fn):
        print('...doesnt exist, skipping'.format(sub,ses,task,run))
        return None

    if atlas == 'thal':
        toappend = '_thalamus'
    else: 
        toappend=''
    
    ## dont really need to convert to psc here
    if to_psc:
        epi = _make_psc(epi_fn)
        psc_fn = '_psc'
    else:
        epi = nib.load(epi_fn)
        psc_fn = ''
    
    # might wanna ahve the hp data handy
    if use_hp:
        output_fn = f'../derivatives/extracted_signals_thal_voxels/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas}-signals{psc_fn}_hp.tsv'
    else:
        output_fn = f'../derivatives/extracted_signals_that_voxels/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas}-signals{psc_fn}.tsv'
    
    if os.path.exists(output_fn) and not overwrite:
        print(f'{output_fn} already run, loading previous result...')
        return pd.read_csv(output_fn, sep='\t')
    
    #epi = nib.load(epi_fn)
    epi_flat = epi.get_fdata().reshape((np.product(epi.shape[:3]), epi.shape[-1]))

    # load atlas
    atlas = load_atlas(sub,atlas_name=atlas)
    
    dfs = []
    for i in np.arange(len(atlas.labels)):
        print('.', end='')
        label = list(atlas.labels)[i]
        mask = image.index_img(atlas.maps, i)
        mask_flat = mask.get_fdata().ravel()
        indexes = np.where(mask_flat>0)[0] # indexes of voxel within mask. len(indexes) and np.count_nonzero(mask_flat) should be the same

        print(f'There are {np.count_nonzero(mask_flat)} voxels in region {label} for sub {sub}')
        for label_n, inds in enumerate(indexes):
            mask_flat_voxel = mask_flat.copy()
            mask_flat_voxel[:] = 0
            mask_flat_voxel[inds] = 1
            signal = pd.DataFrame(np.average(epi_flat, weights=mask_flat_voxel, axis=0), columns=[label+'_'+str(label_n)])
            signal.index.name = 'volume'
            dfs.append(signal)

    df = pd.concat(dfs, axis=1)
    if not os.path.exists(os.path.dirname(output_fn)):
        os.makedirs(os.path.dirname(output_fn))
    df.to_csv(output_fn, sep='\t')
    print(output_fn)
    return df

In [None]:
for i, comb in enumerate(all_combs):
    print(comb)
    sub = comb[0]
    if check_affines(sub):
        do_extract(comb, atlas='MASSP', overwrite=True, to_psc=False, use_hp=True)
    else:
        print(f'Affines for sub {sub} not identical')

('002', 'sstmsit', 'msit', '1')
Extracting from sub-002/ses-sstmsit/sub-002_ses-sstmsit_task-msit_run-1.There are 2693 voxels in region Tha-l for sub 002


In [None]:
for i, comb in enumerate(all_combs):
    print(comb)
    sub = comb[0]
    if check_affines(sub):
        do_extract(comb, atlas='THAL', overwrite=True, to_psc=False, use_hp=False)
    else:
        print(f'Affines for sub {sub} not identical')

In [101]:
all_combs = all_combs[:2]

In [None]:
all_combs

In [91]:
atlas = 'MASSP'
to_run = [('002', 'sstmsit', 'msit', '1')]
use_hp=True
sub='002'
ses='sstmsit'
task='msit'
run='1'
to_psc = False
overwrite=True
sub = str(sub).zfill(3)
print(f'Extracting from sub-{sub}/ses-{ses}/sub-{sub}_ses-{ses}_task-{task}_run-{run}', end='')

epi_fn = get_epi(sub,ses,task,run,use_hp)
if not os.path.exists(epi_fn):
    print('...doesnt exist, skipping'.format(sub,ses,task,run))
#     return None

if atlas == 'thal':
    toappend = '_thalamus'
else: 
    toappend=''

## dont really need to convert to psc here
if to_psc:
    epi = _make_psc(epi_fn)
    psc_fn = '_psc'
else:
    epi = nib.load(epi_fn)
    psc_fn = ''

# might wanna ahve the hp data handy
if use_hp:
    output_fn = f'../derivatives/extracted_signals_thal_voxels/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas}-signals{psc_fn}_hp.tsv'
else:
    output_fn = f'../derivatives/extracted_signals_that_voxels/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas}-signals{psc_fn}.tsv'

if os.path.exists(output_fn) and not overwrite:
    print(f'{output_fn} already run, loading previous result...')
#     return pd.read_csv(output_fn, sep='\t')

#epi = nib.load(epi_fn)
epi_flat = epi.get_fdata().reshape((np.product(epi.shape[:3]), epi.shape[-1]))

# load atlas
atlas = load_atlas(sub,atlas_name=atlas)

dfs = []
for i in np.arange(len(atlas.labels)):
    print('.', end='')
    label = list(atlas.labels)[i]
    mask = image.index_img(atlas.maps, i)
    mask_flat = mask.get_fdata().ravel()
    indexes = np.where(mask_flat>0)[0] # indexes of voxel within mask. len(indexes) and np.count_nonzero(mask_flat) should be the same
    
    print(f'There are {np.count_nonzero(mask_flat)} voxels in region {label} for sub {sub}')
#     for j in np.arange(np.count_nonzero(mask_flat)): ### FINISH THISSS # loop over each voxel in mask .. 
    for i, kk in enumerate(indexes):
        mask_flat_voxel = mask_flat.copy()
        mask_flat_voxel[:] = 0
        mask_flat_voxel[kk] = 1
        signal = pd.DataFrame(np.average(epi_flat, weights=mask_flat_voxel, axis=0), columns=[label+'_'+str(j)])
        signal.index.name = 'volume'
        dfs.append(signal)

# df = pd.concat(dfs, axis=1)
# #     output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-MASSP-signals.tsv'
# if not os.path.exists(os.path.dirname(output_fn)):
#     os.makedirs(os.path.dirname(output_fn))
# df.to_csv(output_fn, sep='\t')
# print(output_fn)
# return df

Extracting from sub-002/ses-sstmsit/sub-002_ses-sstmsit_task-msit_run-1..

In [70]:
indexes = np.where(mask_flat>0)[0]
for kk in indexes:
    mask_flat_voxel = mask_flat.copy()
    mask_flat_voxel[:] = 0
    mask_flat_voxel[kk] = 1


In [71]:
np.count_nonzero(mask_flat_voxel)

0

In [92]:
np.count_nonzero(mask_flat)

2716

In [96]:
for j in np.arange(np.count_nonzero(mask_flat)):
    print(j)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [95]:
for i, kk in enumerate(indexes):
    print(i)
    print(kk)

0
466219
1
476668
2
476761
3
476763
4
476764
5
476853
6
476854
7
476857
8
476858
9
476947
10
476948
11
476949
12
476952
13
476953
14
476954
15
476955
16
477045
17
477048
18
477049
19
477050
20
477140
21
477141
22
477142
23
477143
24
477144
25
477145
26
477235
27
477236
28
477237
29
477238
30
477239
31
477240
32
477241
33
477330
34
477331
35
477333
36
477334
37
477335
38
477426
39
477428
40
477429
41
477523
42
487494
43
487498
44
487499
45
487591
46
487594
47
487685
48
487686
49
487687
50
487688
51
487689
52
487778
53
487779
54
487780
55
487781
56
487782
57
487783
58
487784
59
487785
60
487786
61
487872
62
487873
63
487874
64
487875
65
487876
66
487877
67
487878
68
487879
69
487880
70
487881
71
487882
72
487965
73
487966
74
487967
75
487968
76
487969
77
487970
78
487971
79
487972
80
487973
81
487974
82
487975
83
487976
84
488059
85
488061
86
488062
87
488063
88
488064
89
488065
90
488066
91
488067
92
488068
93
488069
94
488070
95
488071
96
488072
97
488157
98
488158
99
488159
100
488160

In [87]:
kk

667227

In [88]:
np.where(mask_flat_voxel==1)

(array([667227]),)

# 2. Use nilearn, and high-pass filter & remove confounds along the way
### This is much faster, but something very weird happens when multiple atlas maps overlap (eg when there's both a mask for "M1" as well as "rM1" and "lM1")
the M1 case isn't very troubling, but it suggests something funny happens with overlapping maps/masks - do we have overlap? Perhaps the manually coded version is safer

In [41]:
## extract signals this way?
## stolen from nideconv
import pandas as pd
from nilearn import input_data
import nibabel as nb
from nilearn._utils import check_niimg
from nilearn import image
import numpy as np

def extract_timecourse_from_nii(atlas,
                                nii,
                                mask=None,
                                confounds=None,
                                atlas_type=None,
                                t_r=None,
                                low_pass=None,
                                high_pass=1./128,
                                to_psc=False,
                                *args,
                                **kwargs):


    standardize = kwargs.pop('standardize', False)
    detrend = kwargs.pop('detrend', False)

    if atlas_type is None:
        maps = check_niimg(atlas.maps)

        if len(maps.shape) == 3:
            atlas_type = 'labels'
        else:
            atlas_type = 'prob'

    if atlas_type == 'labels':
        masker = input_data.NiftiLabelsMasker(atlas.maps,
                                              mask_img=mask,
                                              standardize=standardize,
                                              detrend=detrend,
                                              t_r=t_r,
                                              low_pass=low_pass,
                                              high_pass=high_pass,
                                              *args, **kwargs)
    else:
        masker = input_data.NiftiMapsMasker(atlas.maps,
                                            mask_img=mask,
                                            standardize=standardize,
                                            detrend=detrend,
                                            t_r=t_r,
                                            low_pass=low_pass,
                                            high_pass=high_pass,
                                            *args, **kwargs)

    if to_psc:
        data = _make_psc(nii)
    else:
        data = nii

    results = masker.fit_transform(data,
                                   confounds=confounds)

    # For weird atlases that have a label for the background
    if len(atlas.labels) == results.shape[1] + 1:
        atlas.labels = atlas.labels[1:]

    if t_r is None:
        t_r = 1
    print(t_r)
    index = pd.Index(np.arange(0,
                               t_r*nib.load(nii).shape[-1],
                               t_r),
                     name='time')

    columns = pd.Index(atlas.labels,
                       name='roi')

    return pd.DataFrame(results,
                        index=index,
                        columns=columns)


In [42]:
def exclude_map_from_atlas(atlas, map_name):
    
    indx = np.where(np.array(list(atlas.labels)) == map_name)[0][0]

    all_indices = np.arange(atlas.maps.shape[-1])
    indices = [x for x in all_indices if not x == indx]

    atlas.maps = nilearn.image.index_img(atlas.maps, indices)
    atlas.labels = np.array(list(atlas.labels))[indices].tolist()
    
    return atlas

def _make_psc(data):
    mean_img = image.mean_img(data)

    # Replace 0s for numerical reasons
    mean_data = mean_img.get_data()
    mean_data[mean_data == 0] = 1
    denom = image.new_img_like(mean_img, mean_data)

    return image.math_img('data / denom[..., np.newaxis] * 100 - 100',
                          data=data, denom=denom)


def extract_signals_nilearn(comb, include_physio=True, space='T1w', overwrite=False):
#for sub, ses, task, run in all_combs:
    sub,ses,task,run = comb
    epi_fn = get_epi(sub,ses,task,run)
    if not os.path.exists(epi_fn):
        print('...doesnt exist, skipping'.format(sub,ses,task,run))       
        return None
    
    # load confounds
    confounds_fn = f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-confounds_timeseries.tsv'
    confounds = pd.read_csv(confounds_fn, sep='\t')[['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z', 'dvars', 'framewise_displacement']].fillna(method='bfill')

    # get retroicor
    if include_physio:
        retroicor_fn = f'../derivatives/retroicor/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-retroicor_regressors.tsv'
        if not os.path.exists(retroicor_fn):
            ## take first 20 aCompCor components
            print("No retroicor found, including 20 a_comp_cor components")
            a_comp_cor = pd.read_csv(confounds_fn, sep='\t')[['a_comp_cor_' + str(x).zfill(2) for x in range(20)]]
            confounds = pd.concat([confounds, a_comp_cor], axis=1)
        else:
            retroicor = pd.read_csv(retroicor_fn, sep='\t', header=None).iloc[:,:20]  ## 20 components in total
            retroicor.columns = ['cardiac_' + str(x) for x in range(6)] + ['respiratory_' + str(x) for x in range(8)] + ['respiratoryxcardiac_' + str(x) for x in range(4)] + ['HRV', 'RVT']
            confounds = pd.concat([confounds, retroicor], axis=1)

    # get brain mask
    brain_mask = f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-1_space-{space}_desc-brain_mask.nii.gz'
    
    for atlas_type in ['MASSP', 'ATAG']:
#     for atlas_type in ['ATAG']:
        output_fn = f'../derivatives/extracted_signals_nilearn/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas_type}-signals.tsv'
        if os.path.exists(output_fn) and not overwrite:
            return 0
        
        subject_atlas = load_atlas(sub, atlas_name=atlas_type)
        
        if atlas_type == 'ATAG':
            subject_atlas = exclude_map_from_atlas(subject_atlas, 'M1')
        
        df = extract_timecourse_from_nii(subject_atlas, epi_fn, mask=brain_mask, confounds=confounds, high_pass=1/128., t_r=1.38, to_psc=True)
        if not os.path.exists(os.path.dirname(output_fn)):
            os.makedirs(os.path.dirname(output_fn))
        df.to_csv(output_fn, sep='\t')

    return 0
#         print(output_fn)

In [40]:
include_physio = True
space = 'T1w'

all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-*/func/*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]


all_combs = [x for x in all_combs if x[2] == 'msit']
all_combs

[('002', 'sstmsit', 'msit', '1'), ('002', 'sstmsit', 'msit', '2')]

In [43]:
out = joblib.Parallel(n_jobs=10, verbose=True)(joblib.delayed(extract_signals_nilearn)(x, overwrite=True) for x in all_combs)

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   2 out of   2 | elapsed:  2.3min finished


# 3. Use pre-cleaned data, don't extract confounds during the process

In [17]:
include_physio = True
space = 'T1w'

def exclude_map_from_atlas(atlas, map_name):
    
    indx = np.where(np.array(list(atlas.labels)) == map_name)[0][0]

    all_indices = np.arange(atlas.maps.shape[-1])
    indices = [x for x in all_indices if not x == indx]

    atlas.maps = nilearn.image.index_img(atlas.maps, indices)
    atlas.labels = np.array(list(atlas.labels))[indices].tolist()
    
    return atlas

def _make_psc(data):
    mean_img = image.mean_img(data)

    # Replace 0s for numerical reasons
    mean_data = mean_img.get_data()
    mean_data[mean_data == 0] = 1
    denom = image.new_img_like(mean_img, mean_data)

    return image.math_img('data / denom[..., np.newaxis] * 100 - 100',
                          data=data, denom=denom)

# def get_epi_fn(sub,ses,task,run, base_dir='../derivatives/fmriprep/fmriprep')

def extract_signals_nilearn(comb, include_physio=True, space='T1w', overwrite=False, use_precleaned=False, use_confounds=True):
    if use_precleaned and use_confounds:
        raise(IOError('Cannot both use precleaned data AND clean, that''s a stupid idea!'))
    
    sub,ses,task,run = comb
    output_base_dir = '../derivatives/extracted_signals_nilearn'
    if use_precleaned:
        base_dir = '../derivatives/cleaned_func'
        output_base_dir += '_precleaned'
    else:
        base_dir = '../derivatives/fmriprep/fmriprep'
    epi_fn = get_epi(sub,ses,task,run, base_dir=base_dir)
    if not os.path.exists(epi_fn):
        print('...doesnt exist, skipping'.format(sub,ses,task,run))       
        return None
    
    if use_confounds:
        output_base_dir += '_cleaned'
        # load confounds
        confounds_fn = f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-confounds_timeseries.tsv'
        confounds = pd.read_csv(confounds_fn, sep='\t')[['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z', 'dvars', 'framewise_displacement']].fillna(method='bfill')

        # get retroicor
        if include_physio:
            retroicor_fn = f'../derivatives/retroicor/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-retroicor_regressors.tsv'
            if not os.path.exists(retroicor_fn):
                ## take first 20 aCompCor components
                print("No retroicor found, including 20 a_comp_cor components")
                a_comp_cor = pd.read_csv(confounds_fn, sep='\t')[['a_comp_cor_' + str(x).zfill(2) for x in range(20)]]
                confounds = pd.concat([confounds, a_comp_cor], axis=1)
            else:
                retroicor = pd.read_csv(retroicor_fn, sep='\t', header=None).iloc[:,:20]  ## 20 components in total
                retroicor.columns = ['cardiac_' + str(x) for x in range(6)] + ['respiratory_' + str(x) for x in range(8)] + ['respiratoryxcardiac_' + str(x) for x in range(4)] + ['HRV', 'RVT']
                confounds = pd.concat([confounds, retroicor], axis=1)

    # get brain mask
    brain_mask = f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-1_space-{space}_desc-brain_mask.nii.gz'
    
    for atlas_type in ['MASSP', 'ATAG']:
        output_fn = os.path.join(output_base_dir, f'sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas_type}-signals.tsv')
        if os.path.exists(output_fn) and not overwrite:
            return 0
        
        subject_atlas = load_atlas(sub, atlas_name=atlas_type)
        if atlas_type == 'ATAG':
            subject_atlas = exclude_map_from_atlas(subject_atlas, 'M1')
        
        df = extract_timecourse_from_nii(subject_atlas, epi_fn, mask=brain_mask, high_pass=None)
        if not os.path.exists(os.path.dirname(output_fn)):
            os.makedirs(os.path.dirname(output_fn))
        df.to_csv(output_fn, sep='\t')

    return 0


In [18]:
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-*/func/*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]

all_combs = [x for x in all_combs if x[1] == 'rlsat']

In [19]:
out = joblib.Parallel(n_jobs=10, verbose=True)(joblib.delayed(extract_signals_nilearn)(x, overwrite=False, use_precleaned=True, use_confounds=False) for x in all_combs)

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   0 out of   0 | elapsed:    0.0s finished


In [41]:
# for sub in np.arange(2, 22):
#     sub = str(sub).zfill(3)
#     ref_nii = f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses-anatomical/func/sub-{sub}_ses-anatomical_task-rs_run-1_space-T1w_desc-preproc_bold.nii.gz'
#     if os.path.exists(ref_nii):
#         ref_affine = nib.load(ref_nii).affine
        
#         all_func_files = sorted(glob.glob(f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses*/func/*space-T1w_desc-preproc_bold.nii.gz'))
#         for func_file in all_func_files:
#             affine = nib.load(func_file).affine
#             if not (affine == ref_affine).all():
#                 print(func_file)

../derivatives/fmriprep/fmriprep/sub-003/ses-anatomical/func/sub-003_ses-anatomical_task-rs_run-2_space-T1w_desc-preproc_bold.nii.gz
../derivatives/fmriprep/fmriprep/sub-003/ses-rbrevl/func/sub-003_ses-rbrevl_task-rb_run-1_space-T1w_desc-preproc_bold.nii.gz
../derivatives/fmriprep/fmriprep/sub-003/ses-rbrevl/func/sub-003_ses-rbrevl_task-rb_run-2_space-T1w_desc-preproc_bold.nii.gz
../derivatives/fmriprep/fmriprep/sub-003/ses-rbrevl/func/sub-003_ses-rbrevl_task-revl_run-1_space-T1w_desc-preproc_bold.nii.gz
../derivatives/fmriprep/fmriprep/sub-003/ses-rbrevl/func/sub-003_ses-rbrevl_task-revl_run-2_space-T1w_desc-preproc_bold.nii.gz
../derivatives/fmriprep/fmriprep/sub-003/ses-rlsat/func/sub-003_ses-rlsat_task-rlsat_run-1_space-T1w_desc-preproc_bold.nii.gz
../derivatives/fmriprep/fmriprep/sub-003/ses-rlsat/func/sub-003_ses-rlsat_task-rlsat_run-2_space-T1w_desc-preproc_bold.nii.gz
../derivatives/fmriprep/fmriprep/sub-003/ses-rlsat/func/sub-003_ses-rlsat_task-rlsat_run-3_space-T1w_desc-prepr

## Clean niftis
1. High-pass
2. Remove confounds

In [None]:
include_physio=True
def high_pass(nii, verbose=False, mask=None):
    print('Highpass-filtering')
    t_r = nii.header['pixdim'][4]
    if mask is not None:
        hp_masker = NiftiMasker(mask, high_pass=1./128, t_r=t_r)
    else:
        hp_masker = NiftiMasker(high_pass=1./128, t_r=t_r)
    
    # Generate & fit NiftiMasker
    hp_data = hp_masker.fit_transform(nii)
    
    # back to brain space
    inver = hp_masker.inverse_transform(hp_data)

    # add mean of timeseries per voxel back
    highpassed_data = inver.get_fdata() + np.mean(nii.get_fdata(), 3)[:,:,:,np.newaxis]
    highpassed_img = nib.Nifti1Image(highpassed_data, inver.affine, header=nii.header)
    
    return highpassed_img


def do_high_pass(fn, overwrite=False):
    regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w_desc-preproc_bold.*')
    gd = regex.match(fn).groupdict()

    brain_mask = nib.load(fn.replace('preproc_bold', 'brain_mask'))

    # has this file been highpassed?
    hp_save_fn = fn.replace('fmriprep/fmriprep', 'high_passed_func')
    if os.path.exists(hp_save_fn) and not overwrite:
        hp_data = nib.load(hp_save_fn)
    else:
        nii = nib.load(fn)
        hp_data = high_pass(nii, mask=brain_mask)
        os.makedirs(os.path.dirname(hp_save_fn), exist_ok=True)
        hp_data.to_filename('../derivatives/high_passed_func/sub-{}/ses-{}/func/{}'.format(gd['sub'], gd['ses'], os.path.basename(fn)))

        
def do_clean(fn, overwrite=False):
    regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w_desc-preproc_bold.*')
    gd = regex.match(fn).groupdict()
    sub, ses, task, run = gd['sub'], gd['ses'], gd['task'], gd['run']
    
    # has this file been cleaned?
    cleaned_save_fn = fn.replace('high_passed_func', 'cleaned_func')
    if os.path.exists(cleaned_save_fn) and not overwrite:
        print(cleaned_save_fn)
        cleaned_data = nib.load(cleaned_save_fn)
        print('eh')
    else:
        nii = nib.load(fn)
        # load confounds
        confounds_fn = f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-confounds_timeseries.tsv'
        confounds = pd.read_csv(confounds_fn, sep='\t')[['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z', 'dvars', 'framewise_displacement']].fillna(method='bfill')

        # get retroicor
        if include_physio:
            retroicor_fn = f'../derivatives/retroicor/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-retroicor_regressors.tsv'
            if not os.path.exists(retroicor_fn):
                ## take first 20 aCompCor components
                print("No retroicor found, including 20 a_comp_cor components")
                a_comp_cor = pd.read_csv(confounds_fn, sep='\t')[['a_comp_cor_' + str(x).zfill(2) for x in range(20)]]
                confounds = pd.concat([confounds, a_comp_cor], axis=1)
            else:
                retroicor = pd.read_csv(retroicor_fn, sep='\t', header=None).iloc[:,:20]  ## 20 components in total
                retroicor.columns = ['cardiac_' + str(x) for x in range(6)] + ['respiratory_' + str(x) for x in range(8)] + ['respiratoryxcardiac_' + str(x) for x in range(4)] + ['HRV', 'RVT']
                confounds = pd.concat([confounds, retroicor], axis=1)

        cleaned_data = image.clean_img(nii, confounds=confounds, standardize=False, detrend=False)

        os.makedirs(os.path.dirname(cleaned_save_fn), exist_ok=True)
        cleaned_data.to_filename('../derivatives/cleaned_func/sub-{}/ses-{}/func/{}'.format(gd['sub'], gd['ses'], os.path.basename(fn)))

all_funcs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses*/func/*T1w*_bold.nii.gz'))
# all_funcs = [x for x in all_funcs if not 'sub-001' in x]
# all_funcs

all_highpassed = sorted(glob.glob('../derivatives/high_passed_func/sub*/ses*/func/*'))
all_highpassed = [x for x in all_highpassed if 'rlsat' in x]

out = joblib.Parallel(n_jobs=20, verbose=True)(joblib.delayed(do_clean)(x, overwrite=True) for x in all_highpassed)

# do_clean(all_highpassed[0])

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.


# calculate number of voxels in the IFG

(scott) this was necessary to satisfy review comments, comparing the number of voxels in the IFG when using the HCP_MMP1 atlas vs the Harvard Oxford atlas

In [23]:
def calculate_voxel_numbers(to_run, atlas_name='MASSP', overwrite=False, to_psc=False, use_hp=False):
    sub, ses, task, run = to_run
    sub = str(sub).zfill(3)
    print(f'Extracting from sub-{sub}/ses-{ses}/sub-{sub}_ses-{ses}_task-{task}_run-{run}', end='')
    
    epi_fn = get_epi(sub,ses,task,run,use_hp)
    if not os.path.exists(epi_fn):
        print('...doesnt exist, skipping'.format(sub,ses,task,run))
        return None
    
    ## dont really need to convert to psc here
    if to_psc:
        epi = _make_psc(epi_fn)
        psc_fn = '_psc'
    else:
        epi = nib.load(epi_fn)
        psc_fn = ''
    
    # might wanna ahve the hp data handy
    if use_hp:
        output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas_name}-signals{psc_fn}_hp.tsv'
    else:
        output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas_name}-signals{psc_fn}.tsv'
    
    if os.path.exists(output_fn) and not overwrite:
        print(f'{output_fn} already run, loading previous result...')
        return pd.read_csv(output_fn, sep='\t')
    
    #load & reshpae
    epi_flat = epi.get_fdata().reshape((np.product(epi.shape[:3]), epi.shape[-1]))

    # load atlas
    atlas = load_atlas(sub,atlas_name=atlas_name)
    
    dfs = []
    for i in np.arange(len(atlas.labels)):
        print('.', end='')
        label = list(atlas.labels)[i]
        mask = image.index_img(atlas.maps, i)
        mask_flat = mask.get_fdata().ravel()
        if label in ['IFG-l', 'IFG-r','IFGhcp-l','IFGhcp-r']:
            print(f'There are {np.count_nonzero(mask_flat)} voxels in region {label}')
            return np.count_nonzero(mask_flat)



In [35]:
all_atlases=['HCP_MMP1']
# all_atlases = ['CORT']

hp_options= [True]
overwrite=True
psc=False
IFG_vox = []

for atlas_name in all_atlases:
    for hp in hp_options:
        for i, comb in enumerate(all_combs):
            print(f'atlas-{atlas_name} hp-{hp}')
            print(comb)
            sub = comb[0]
            if check_affines(sub):
                IFG_vox.append(calculate_voxel_numbers(comb, atlas_name=atlas_name, overwrite=overwrite, to_psc=psc, use_hp=hp))
            else:
                print(f'Affines for sub {sub} not identical')

atlas-HCP_MMP1 hp-True
('002', 'sstmsit', 'sst', '1')
Extracting from sub-002/ses-sstmsit/sub-002_ses-sstmsit_task-sst_run-1.There are 2311 voxels in region IFGhcp-l
atlas-HCP_MMP1 hp-True
('002', 'sstmsit', 'sst', '2')
Extracting from sub-002/ses-sstmsit/sub-002_ses-sstmsit_task-sst_run-2.There are 2311 voxels in region IFGhcp-l
atlas-HCP_MMP1 hp-True
('003', 'sstmsit', 'sst', '1')
Extracting from sub-003/ses-sstmsit/sub-003_ses-sstmsit_task-sst_run-1.There are 2007 voxels in region IFGhcp-l
atlas-HCP_MMP1 hp-True
('003', 'sstmsit', 'sst', '2')
Extracting from sub-003/ses-sstmsit/sub-003_ses-sstmsit_task-sst_run-2.There are 2007 voxels in region IFGhcp-l
atlas-HCP_MMP1 hp-True
('004', 'sstmsit', 'sst', '1')
Extracting from sub-004/ses-sstmsit/sub-004_ses-sstmsit_task-sst_run-1.There are 2391 voxels in region IFGhcp-l
atlas-HCP_MMP1 hp-True
('004', 'sstmsit', 'sst', '2')
Extracting from sub-004/ses-sstmsit/sub-004_ses-sstmsit_task-sst_run-2.There are 2391 voxels in region IFGhcp-l
atla

In [34]:
# harvard oxford
Harv_Ox_IFG = [1656,1544,1551,1881,1534,1622,1993,1522,2085,1484,1817,1772,2320,
 2221,1910,1496,1549,1867,1968,1997,1485,1923,1780,1808,1626,2113,
 1998,1825,1753,2140,1786,1925,1764,1759,1510,1836,1520]

np.array(Harv_Ox_IFG).mean()
# 1792.972972972973

1792.972972972973

In [38]:
HCP_MMP1_IFG = [2311,2007,2391,2401,2344,1791,2474,1929,2576,1919,2752,2240,2777,2626,
 2819,1849,2363,2389,2677,2836,2140,2532,2067,2325,2421,2618,2874,2648,
 2578,2893,2536,2457,2626,2068,2080,2318,2247]

np.array(HCP_MMP1_IFG).mean()

2402.675675675676

# TMP TO RUN NORMAL EXTRACTION

In [1]:
import glob
import os
import re
import shutil
import random
import string
import warnings

import numpy as np
import pandas as pd

import nilearn
from nilearn import plotting, image
from nilearn.input_data import NiftiMasker
import nibabel as nib
from nipype.interfaces import ants
import nighres

import subprocess
import json
import multiprocessing as mp
from functools import partial
import joblib
from joblib import Parallel, delayed
import itertools

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

	 A newer version (1.7.0) of nipy/nipype is available. You are using 1.6.0


In [2]:
def find_rois(sub, atlas_name='ATAG', space='T1w'):
    if atlas_name == 'ATAG':
        if space == 'MNI152NLin2009cAsym' or space == 'mni':
            ### Rois in MNI09c-space
            mask_dir='/home/Public/trondheim/sourcedata/masks/MNI152NLin2009cAsym_res-1p5'
            fns = sorted(glob.glob(mask_dir + '/space-*'))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_res-1p5_label-(?P<label>[a-zA-Z0-9]+)_probseg_def-img.nii.gz', fn).groupdict()['label'] for fn in fns]
        else:
            mask_dir = f'../derivatives/masks_atag_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
            fns = sorted(glob.glob(mask_dir))
            names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>[a-zA-Z0-9]+).nii.gz', fn).groupdict()['label'] for fn in fns]
    elif atlas_name == 'MASSP':
        mask_dir = f'../derivatives/masks_massp_func/sub-{sub}/anat/sub-{sub}_*.nii.gz'
        fns = sorted(glob.glob(mask_dir))
        names = [re.match('.*space-(?P<space>[a-zA-Z0-9]+)_desc-mask-(?P<label>\S+).nii.gz', fn).groupdict()['label'] for fn in fns]

    roi_dict = dict(zip(names, fns))
    return roi_dict

def load_atlas(sub, atlas_name='MASSP', space='T1w'):
    from nilearn import image
    
    roi_dict = find_rois(sub, atlas_name, space)
    if len(roi_dict) == 0:
        warnings.warn(f'No ROIs found for sub-{sub} atlas-{atlas_name} space-{space}. Returning 0 to prevent error')
        return 0
    combined = image.concat_imgs(roi_dict.values())
    
    class AttrDict(dict):
        def __init__(self, *args, **kwargs):
            super(AttrDict, self).__init__(*args, **kwargs)
            self.__dict__ = self
            
    roi_atlas = AttrDict({'maps': combined,
                          'labels': roi_dict.keys()})
    
    return roi_atlas

In [3]:
def get_epi(sub, ses, task, run, use_hp=False, base_dir='../derivatives/fmriprep/fmriprep'):
    if use_hp:
        epi = os.path.join('../derivatives/high_passed_func', f'sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_space-T1w_desc-preproc_bold.nii.gz')
    else:
        epi = os.path.join(base_dir, f'sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_space-T1w_desc-preproc_bold.nii.gz')
    return epi

def _make_psc(data):
    mean_img = image.mean_img(data)

    # Replace 0s for numerical reasons
    mean_data = mean_img.get_fdata()
    mean_data[mean_data == 0] = 1
    denom = image.new_img_like(mean_img, mean_data)

    return image.math_img('data / denom[..., np.newaxis] * 100 - 100',
                          data=data, denom=denom)

def do_extract(to_run, atlas_name, overwrite=False, to_psc=False, use_hp=False):
    sub, ses, task, run = to_run
    sub = str(sub).zfill(3)
    print(f'Extracting from sub-{sub}/ses-{ses}/sub-{sub}_ses-{ses}_task-{task}_run-{run}', end='')
    
    epi_fn = get_epi(sub,ses,task,run,use_hp)
    if not os.path.exists(epi_fn):
        print('...doesnt exist, skipping'.format(sub,ses,task,run))
        return None
    
    # load atlas
    atlas = load_atlas(sub, atlas_name=atlas_name)
    if atlas == 0:
        warnings.warn('No atlas found! skipping')
        return None

    if to_psc:
        epi = _make_psc(epi_fn)
        psc_fn = '_psc'
    else:
        epi = nib.load(epi_fn)
        psc_fn = ''
    
    if use_hp:
        output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atasl_name}-signals{psc_fn}_hp.tsv'
    else:
        output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-{atlas_name}-signals{psc_fn}.tsv'
    print(output_fn)
    if os.path.exists(output_fn) and not overwrite:
        print(f'{output_fn} already run, loading previous result...')
        return pd.read_csv(output_fn, sep='\t')
    
    # load & reshape
    epi_flat = epi.get_fdata().reshape((np.product(epi.shape[:3]), epi.shape[-1]))

    dfs = []
    for i in np.arange(len(atlas.labels)):
        print('.', end='')
        label = list(atlas.labels)[i]
        mask = image.index_img(atlas.maps, i)
        mask_flat = mask.get_fdata().ravel()
        signal = pd.DataFrame(np.average(epi_flat, weights=mask_flat, axis=0), columns=[label])
        signal.index.name = 'volume'
        dfs.append(signal)

    df = pd.concat(dfs, axis=1)
#     output_fn = f'../derivatives/extracted_signals/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-{task}_run-{run}_desc-MASSP-signals.tsv'
    if not os.path.exists(os.path.dirname(output_fn)):
        os.makedirs(os.path.dirname(output_fn))
    df.to_csv(output_fn, sep='\t')
    print(output_fn)
    return df

In [5]:
# find all available functional runs, extract sub/ses/task/run info
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-*/func/*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]
# all_combs = [x for x in all_combs if x[0] in ['002','003','004','005','006','007','008','009','010','011']]
# all_combs = [x for x in all_combs if x[0] in ['012','013','014','015','016','017','018','019','020','021','022','023','024','025','026']]
#all_combs = [x for x in all_combs if x[0] in ['027','029','030','031','032']]

# all_combs = [x for x in all_combs if x[1] == 'rlsat']
all_combs = [x for x in all_combs if x[1] == 'sstmsit']
# check if 
all_combs

[('002', 'sstmsit', 'msit', '1'),
 ('002', 'sstmsit', 'msit', '2'),
 ('002', 'sstmsit', 'sst', '1'),
 ('002', 'sstmsit', 'sst', '2'),
 ('003', 'sstmsit', 'msit', '1'),
 ('003', 'sstmsit', 'msit', '2'),
 ('003', 'sstmsit', 'sst', '1'),
 ('003', 'sstmsit', 'sst', '2'),
 ('004', 'sstmsit', 'msit', '1'),
 ('004', 'sstmsit', 'msit', '2'),
 ('004', 'sstmsit', 'sst', '1'),
 ('004', 'sstmsit', 'sst', '2'),
 ('005', 'sstmsit', 'msit', '1'),
 ('005', 'sstmsit', 'msit', '2'),
 ('005', 'sstmsit', 'sst', '1'),
 ('005', 'sstmsit', 'sst', '2'),
 ('006', 'sstmsit', 'msit', '1'),
 ('006', 'sstmsit', 'msit', '2'),
 ('006', 'sstmsit', 'sst', '1'),
 ('006', 'sstmsit', 'sst', '2'),
 ('007', 'sstmsit', 'msit', '1'),
 ('007', 'sstmsit', 'msit', '2'),
 ('007', 'sstmsit', 'sst', '1'),
 ('007', 'sstmsit', 'sst', '2'),
 ('008', 'sstmsit', 'msit', '1'),
 ('008', 'sstmsit', 'msit', '2'),
 ('008', 'sstmsit', 'sst', '1'),
 ('008', 'sstmsit', 'sst', '2'),
 ('009', 'sstmsit', 'msit', '1'),
 ('009', 'sstmsit', 'msit', 

In [None]:
# find all available functional runs, extract sub/ses/task/run info
all_runs = sorted(glob.glob('../derivatives/fmriprep/fmriprep/sub-*/ses-*/func/*space-T1w*_bold.nii.gz'))
regex = re.compile('.*sub-(?P<sub>\d+)_ses-(?P<ses>\S+)_task-(?P<task>\S+)_run-(?P<run>\d)_space-T1w*')
all_combs = [tuple(regex.match(x).groupdict().values()) for x in all_runs]
# all_combs = [x for x in all_combs if x[0] in ['002','003','004','005','006','007','008','009','010','011']]
# all_combs = [x for x in all_combs if x[0] in ['012','013','014','015','016','017','018','019','020','021','022','023','024','025','026']]
#all_combs = [x for x in all_combs if x[0] in ['027','029','030','031','032']]

# all_combs = [x for x in all_combs if x[1] == 'rlsat']
# all_combs = [x for x in all_combs if x[1] == 'sstmsit']
# check if 
all_combs

In [7]:
def check_affines(sub):
    sub = str(sub).zfill(3)
    all_funcs = sorted(glob.glob(f'../derivatives/fmriprep/fmriprep/sub-{sub}/ses*/func/sub*_space-T1w_desc-preproc_bold.nii.gz'))
    all_affines = [nib.load(x).affine for x in all_funcs]
    return (np.array(all_affines)[0] == np.array(all_affines)).all()

In [8]:
for i, comb in enumerate(all_combs):
    print(comb)
    sub = comb[0]
    if check_affines(sub):
        do_extract(comb, atlas_name='ATAG',overwrite=False, to_psc=False, use_hp=False)
    else:
        print(f'Affines for sub {sub} not identical')

('002', 'sstmsit', 'msit', '1')
Extracting from sub-002/ses-sstmsit/sub-002_ses-sstmsit_task-msit_run-1../derivatives/extracted_signals/sub-002/ses-sstmsit/func/sub-002_ses-sstmsit_task-msit_run-1_desc-ATAG-signals.tsv
......................../derivatives/extracted_signals/sub-002/ses-sstmsit/func/sub-002_ses-sstmsit_task-msit_run-1_desc-ATAG-signals.tsv
('002', 'sstmsit', 'msit', '2')
Extracting from sub-002/ses-sstmsit/sub-002_ses-sstmsit_task-msit_run-2../derivatives/extracted_signals/sub-002/ses-sstmsit/func/sub-002_ses-sstmsit_task-msit_run-2_desc-ATAG-signals.tsv
......................../derivatives/extracted_signals/sub-002/ses-sstmsit/func/sub-002_ses-sstmsit_task-msit_run-2_desc-ATAG-signals.tsv
('002', 'sstmsit', 'sst', '1')
Extracting from sub-002/ses-sstmsit/sub-002_ses-sstmsit_task-sst_run-1../derivatives/extracted_signals/sub-002/ses-sstmsit/func/sub-002_ses-sstmsit_task-sst_run-1_desc-ATAG-signals.tsv
......................../derivatives/extracted_signals/sub-002/ses-sst