## Data preparation for decoding models

We will use the data from Haxby et al., 2001.


In [19]:
import os
from nilearn import datasets, plotting
from nilearn.image import load_img, mean_img
from nilearn.maskers import NiftiMasker
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm.second_level import SecondLevelModel
from nilearn.plotting import plot_stat_map, plot_design_matrix
import h5py
import numpy as np
import nibabel as nib
import datalad.api as dl
from bids import BIDSLayout
from nilearn.glm.first_level import make_first_level_design_matrix
import pandas as pd
import matplotlib.pyplot as plt
from templateflow import api as tflow
import templateflow

#### Get the data using datalad

In [5]:
data_dir = "/Users/poldrack/data_unsynced/ds000105"

output_dir = os.path.join(data_dir, 'derivatives', 'glm')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# get the data
ds = dl.clone(
    path=data_dir,
    source="https://github.com/OpenNeuroDatasets/ds000105.git",
)
dl.get(dataset=data_dir, recursive=True)

get_fmriprep = False
fmriprep_dir = os.path.join(data_dir, 'derivatives', 'fmriprep')

if get_fmriprep:
    dl.clone(
        path=fmriprep_dir,
        source='https://github.com/OpenNeuroDerivatives/ds000105-fmriprep.git')
    dl.get(dataset=fmriprep_dir, recursive=True)

[INFO] Ensuring presence of Dataset(/Users/poldrack/data_unsynced/ds000105) to get /Users/poldrack/data_unsynced/ds000105 


In [24]:
# also get the difumo atlas using templateflow

def get_difumo_mask():
    templateflow.api.TF_S3_ROOT = 'https://templateflow.s3.amazonaws.com'
    atlas = tflow.get('MNI152NLin2009cAsym', resolution=2, atlas='DiFuMo')

    difumo64_file = [i for i in atlas if '64' in i.as_posix()][0]

    # create a mask for visual cortices using the relevant difumo componnets: 2, 3, 16, 29, 42, 55, 31
    # (need to subtract one from these are they are 1-indexed)

    components = [1, 2, 15, 28, 41, 54, 30]
    difumo_mask = nib.load(difumo64_file)
    mask_data = (difumo_mask.get_fdata() > 0).astype(int)
    mask = (mask_data[..., components].sum(axis=-1) > 0).astype('int32')
    mask_img = nib.Nifti1Image(mask, difumo_mask.affine)
    return mask_img


In [7]:
# load the dataset using pybids and get runs for each subject
# pybids can't process derivatives so we use the raw dir to get this info

layout = BIDSLayout(data_dir)
bold_files = layout.get(datatype='func', extension='nii.gz', return_type='file')
subjects = layout.get_subjects()
sub_runs = {}
for subject in subjects:
    bold_files = layout.get(subject=subject, datatype='func', extension='nii.gz', return_type='file')
    sub_runs[subject] = layout.get_runs(subject=subject)
    print(f"Subject {subject} has {len(bold_files)} runs")


Subject 1 has 12 runs
Subject 2 has 12 runs
Subject 3 has 12 runs
Subject 4 has 12 runs
Subject 5 has 11 runs
Subject 6 has 12 runs


### Fit linear models for each subject/run


In [13]:


def get_subject_data(subject, run, layout, fmriprep_dir, save_maps=True):
    run = int(run)
    bold_file = os.path.join(
        fmriprep_dir, 
        f'sub-{subject}/func/sub-{subject}_task-objectviewing_run-{run}_space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz')
    bold_img = nib.load(bold_file)

    mask_file = os.path.join(
        fmriprep_dir, 
        f'sub-{subject}/func/sub-{subject}_task-objectviewing_run-{run}_space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz')
    mask_img = nib.load(mask_file)
    mask_data = mask_img.get_fdata().astype(bool)

    events_file = layout.get(subject=subject, run=run, datatype='func', extension='tsv', return_type='file')[0]

    events = pd.read_csv(events_file, sep='\t')
    n_scans = bold_img.shape[-1]
    # Define the sampling times for the design matrix
    t_r = 2.5
    frame_times = np.arange(n_scans) * t_r
    # Sample at the beginning of each acquisition.
    slice_time_ref = 0.0
    # We use a discrete cosine transform to model signal drifts.
    drift_model = "Cosine"
    # The cutoff for the drift model is 0.01 Hz.
    high_pass = 0.01
    # The hemodynamic response function
    hrf_model = "spm + derivative"

    design_matrix = make_first_level_design_matrix(
        frame_times,
        events,
        hrf_model=hrf_model,
        drift_model=drift_model,
        high_pass=high_pass,
    )

    fmri_glm = FirstLevelModel(t_r, noise_model='ar1', standardize=False, hrf_model=hrf_model)
    fmri_glm = fmri_glm.fit(bold_img, design_matrices=design_matrix)

    plot_slices = False

    conditions = events.trial_type.unique()
    conditions.sort()
    z_map = {}
    for condition in conditions:
        contrast = fmri_glm.compute_contrast(condition, output_type='all')
        z_map[condition] = contrast['z_score']
        if save_maps:
            contrast['z_score'].to_filename(os.path.join(output_dir, f'sub-{subject}_run-{run}_zmap_{condition}.nii.gz'))
            contrast['effect_size'].to_filename(os.path.join(output_dir, f'sub-{subject}_run-{run}_beta_{condition}.nii.gz'))

    z_maps = nib.concat_images([z_map[condition] for condition in conditions])
    
    difumo_mask = get_difumo_mask()
    masker = NiftiMasker(mask_img=difumo_mask, standardize=True)
    data = masker.fit_transform(z_maps)
    assert data.shape == (len(conditions), mask_data.sum())
    return data, conditions




with h5py.File(os.path.join(output_dir, 'visctx_data.h5'), 'w') as hf:
    for subject in subjects:
        g1 = hf.create_group(f'sub-{subject}')
        for run in sub_runs[subject]:
            g2 = g1.create_group(f'run-{run}')
            print(f"Processing subject {subject} run {run}")
            data, conditions = get_subject_data(subject, run, layout, fmriprep_dir)
            g2.create_dataset(f'voxdata',data=data)
            g2.create_dataset(f'conditions', data=[c.encode('utf-8') for c in conditions])

Processing subject 1 run 01
{'z_score': <nibabel.nifti1.Nifti1Image object at 0x175cded50>, 'stat': <nibabel.nifti1.Nifti1Image object at 0x175cdca40>, 'p_value': <nibabel.nifti1.Nifti1Image object at 0x16b1cadb0>, 'effect_size': <nibabel.nifti1.Nifti1Image object at 0x16bd6a660>, 'effect_variance': <nibabel.nifti1.Nifti1Image object at 0x175cdff20>}
{'z_score': <nibabel.nifti1.Nifti1Image object at 0x16c1794c0>, 'stat': <nibabel.nifti1.Nifti1Image object at 0x16bd68080>, 'p_value': <nibabel.nifti1.Nifti1Image object at 0x175d055e0>, 'effect_size': <nibabel.nifti1.Nifti1Image object at 0x175d06810>, 'effect_variance': <nibabel.nifti1.Nifti1Image object at 0x175d05b50>}
{'z_score': <nibabel.nifti1.Nifti1Image object at 0x16b70ae70>, 'stat': <nibabel.nifti1.Nifti1Image object at 0x16c27b7d0>, 'p_value': <nibabel.nifti1.Nifti1Image object at 0x16c178260>, 'effect_size': <nibabel.nifti1.Nifti1Image object at 0x16c317440>, 'effect_variance': <nibabel.nifti1.Nifti1Image object at 0x16c3167e0

  Y, _ = mean_scaling(Y, self.signal_scaling)


{'z_score': <nibabel.nifti1.Nifti1Image object at 0x173e728d0>, 'stat': <nibabel.nifti1.Nifti1Image object at 0x173e735f0>, 'p_value': <nibabel.nifti1.Nifti1Image object at 0x173e72990>, 'effect_size': <nibabel.nifti1.Nifti1Image object at 0x173adc4a0>, 'effect_variance': <nibabel.nifti1.Nifti1Image object at 0x173adeea0>}
{'z_score': <nibabel.nifti1.Nifti1Image object at 0x173adfc80>, 'stat': <nibabel.nifti1.Nifti1Image object at 0x173adc9e0>, 'p_value': <nibabel.nifti1.Nifti1Image object at 0x173adcf20>, 'effect_size': <nibabel.nifti1.Nifti1Image object at 0x173adf890>, 'effect_variance': <nibabel.nifti1.Nifti1Image object at 0x173add790>}
{'z_score': <nibabel.nifti1.Nifti1Image object at 0x173adccb0>, 'stat': <nibabel.nifti1.Nifti1Image object at 0x173addd30>, 'p_value': <nibabel.nifti1.Nifti1Image object at 0x173adc920>, 'effect_size': <nibabel.nifti1.Nifti1Image object at 0x173adcd40>, 'effect_variance': <nibabel.nifti1.Nifti1Image object at 0x173adf2c0>}
{'z_score': <nibabel.nift

In [17]:
# combine across runs within each subject

for subject in subjects:
    for condition in conditions:
        beta_files = [os.path.join(output_dir, f'sub-{subject}_run-{int(run)}_beta_{condition}.nii.gz') for run in sub_runs[subject]]
        model = SecondLevelModel(mask_img)
        model.fit(beta_files, design_matrix=pd.DataFrame([1] * len(beta_files), columns=['intercept']))
        z_map = model.compute_contrast(output_type='effect_size')
        z_map.to_filename(os.path.join(output_dir, f'sub-{subject}_meanbeta_{condition}.nii.gz'))



In [21]:
beta_files

['/Users/poldrack/data_unsynced/ds000105/derivatives/glm/sub-1_meanbeta_bottle.nii.gz',
 '/Users/poldrack/data_unsynced/ds000105/derivatives/glm/sub-2_meanbeta_bottle.nii.gz',
 '/Users/poldrack/data_unsynced/ds000105/derivatives/glm/sub-3_meanbeta_bottle.nii.gz',
 '/Users/poldrack/data_unsynced/ds000105/derivatives/glm/sub-4_meanbeta_bottle.nii.gz',
 '/Users/poldrack/data_unsynced/ds000105/derivatives/glm/sub-5_meanbeta_bottle.nii.gz',
 '/Users/poldrack/data_unsynced/ds000105/derivatives/glm/sub-6_meanbeta_bottle.nii.gz']

In [23]:
# run group model

for condition in conditions:
    beta_files = [os.path.join(output_dir, f'sub-{subject}_meanbeta_{condition}.nii.gz') for subject in subjects]
    model = SecondLevelModel(mask_img)
    model.fit(beta_files, design_matrix=pd.DataFrame([1] * len(beta_files), columns=['intercept']))
    z_map = model.compute_contrast(output_type='z_score')
    z_map.to_filename(os.path.join(output_dir, f'group_zmap_{condition}.nii.gz'))
