This notebook loads MPI LEMON MRI_Preprocessed_Derivatives data. We load the preprocessed data in MNI2mm space, parcellate into regions, and extract the time series for each regions.

In [1]:
%reload_ext autoreload
%autoreload 2

from pathlib import Path

from src.acnets.parcellations.dosenbach import load_dosenbach2010_masker
from joblib import Parallel, delayed

In [2]:
DATASET_ROOT_PATH = Path('/mnt/Lifestream/MPI-LEMON/MRI_Preprocessed_Derivatives/')

In [3]:
t2_mni2mm_files = sorted(DATASET_ROOT_PATH.glob('**/func/*MNI2mm.nii.gz'))

# DEBUG
subject_folders = DATASET_ROOT_PATH.glob('**/sub-*/')
# subjects_without_fmri = (
#     set(map(lambda x: x.stem, subject_folders)) - set(map(lambda x: x.parents[1].stem, t2_mni2mm_files))
# )

def extract_timeseries(t2_mni_file):
    subject = t2_mni_file.parents[1].stem
    atlas_masker, _ = load_dosenbach2010_masker()
    ts = atlas_masker.fit_transform(t2_mni_file).T  # (n_regions, n_timepoints)
    return subject, ts

timeseries = Parallel(n_jobs=-1, verbose=8)(
    delayed(extract_timeseries)(t2_mni_file)
    for t2_mni_file in t2_mni2mm_files[:16])
timeseries = dict(timeseries)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   4 out of  16 | elapsed:  2.9min remaining:  8.7min
[Parallel(n_jobs=-1)]: Done   7 out of  16 | elapsed:  3.1min remaining:  3.9min
[Parallel(n_jobs=-1)]: Done  10 out of  16 | elapsed:  5.2min remaining:  3.1min
[Parallel(n_jobs=-1)]: Done  13 out of  16 | elapsed:  5.5min remaining:  1.3min
[Parallel(n_jobs=-1)]: Done  16 out of  16 | elapsed:  5.5min finished


In [5]:
import numpy as np
import xarray as xr

_, regions = load_dosenbach2010_masker()
regions = regions.to_xarray().drop_vars('index')

dataset = xr.Dataset()
dataset.attrs['space'] = 'MNI2mm'
dataset['timeseries'] = xr.DataArray(
    np.stack(list(timeseries.values())),
    dims=('subject', 'region', 'timepoint'),
    coords={'subject': list(timeseries.keys())}
)

dataset = xr.merge([dataset, regions])

# TODO normalize timeseries separately for each subject

def normalize_timeseries(x: xr.DataArray):
    """Normalize the subject data to [-1, 1] range."""
    x_norm = (x - x.min(['subject'])) / (x.max(['subject']) - x.min(['subject']))
    x_norm = x_norm * 2 - 1  # map 0 to -1, and 1 to 1
    return x_norm

dataset['timeseries'] = normalize_timeseries(dataset['timeseries'])

dataset.to_netcdf('data/mpi-lemon/dosenbach2010_timeseries.nc')