In [1]:
from nilearn.input_data import NiftiMasker, MultiNiftiMasker
from nilearn.datasets import fetch_atlas_basc_multiscale_2015
from fastsrm.fastsrm import FastSRM
from fastsrm import fastsrm
import matplotlib.pyplot as plt
from nilearn import image
from nilearn import plotting
import os
import glob
import numpy as np
from nilearn.image import new_img_like
import ibc_public
import nibabel as nib

%matplotlib inline



In [2]:
# Specify the mask image
_package_directory = os.path.dirname(os.path.abspath(ibc_public.__file__))
mask_gm = nib.load(os.path.join(_package_directory, '../ibc_data', 'gm_mask_3mm.nii.gz'))

In [3]:
# Task of interest
task = 'clips'

# Any specific files that should be used for FastSRM
if task == 'clips':
    filepattern = '*Trn*.nii.gz'
else:
    filepattern = '*.nii.gz'

In [4]:
# Do this for a previously unused atlas. 
# Else, you should have a .npy file saved from before, and you can just load it.
# The transform() funtion takes a few minutes to run so don't run it 
# unless you absolutely need to.

# Now, a bit of shape shifting to make the atlas compatible with
# what fastsrm.reduce_data() requires.
# 1. Add a 4th dimension to the 3D atlas. The 4th dimension will have as many
#   elements as atlas parcesl (444, in this case)
# 2. The 3D "volume" pertaining to each 4th dimension will contain 1 in the
#   "voxel" for that parcel and 0 otherwise
# 3. Apply the atlas masker set up previously to transform the new 4D atlas
#   into 2D, with n_voxel rows and n_parcel columns,
#   where n_voxel is the number of voxels in the transformed image matrix
# 4. Reduce the 2D atlas matrix to 1D by using the argmax function along the
#   column dimension. Now, the transformed atlas has n_voxel elements.

atlas_loc = os.path.join('..', task, '3mm')
if os.path.exists(os.path.join(atlas_loc, 'atlas_masked.npy')):
    atlas = np.load(os.path.join(atlas_loc, 'atlas_masked.npy'), allow_pickle=True)
else:
    # Specify the atlas
    basc444 = fetch_atlas_basc_multiscale_2015()['scale444']
    basc_im = image.load_img(basc444).get_data()

    atlas_masker = NiftiMasker(mask_img=mask_gm).fit()

    if len(basc_im.shape) == 3:
        n_components = len(np.unique(basc_im)) - 1
        xa, ya, za = basc_im.shape
        A = np.zeros((xa, ya, za, n_components + 1))
        atlas = np.zeros((xa, ya, za, n_components + 1))
        for c in np.unique(basc_im)[1:].astype(int):
            X_ = np.copy(basc_im)
            X_[X_ != c] = 0.
            X_[X_ == c] = 1.
            A[:, :, :, c] = X_
        atlas = atlas_masker.transform(new_img_like(basc444, A))
        atlas = np.argmax(atlas, axis=0)

    # # Save the transformed atlas
    np.save(os.path.join(atlas_loc, 'atlas_masked.npy'), atlas)

In [5]:
# Create a masker to standardize (0 mean, 1 SD) the image files
# and to transform them to a 2D array, as FastSRM requires
img_masker = NiftiMasker(mask_img=mask_gm, 
                              standardize=True, 
                              smoothing_fwhm=5,
                              detrend=True,
                              high_pass=1./128,
                              t_r=2.0).fit()

In [6]:
# Now create a list of movie session files 
movie_dir = os.path.join('..', task, '3mm')
subs = sorted(glob.glob(movie_dir + '/sub*'))
nsub = 0

movie_arrays = []

# Number of sessions per subject
# Different tasks have different numbers of sessions.
# Also, all subjects might not have completed all sessions.
if task == 'clips':
    # For the clips task, one subject doesn't have all 4 sessions, and
    # FastSRM requires that all subjects have the same numbers of TRs
    sessn = 3
else:
    sessn = 2

# Create 2D masked arrays from image data and save to file for quick and easy access
for s, sub in enumerate(subs):
    if os.path.isdir(sub):
        nsub += 1
        sess = sorted(glob.glob(sub + '/ses*'))
        sidx = 0
       
        for i, ses in enumerate(sess):
            if os.path.isdir(ses) and sidx < sessn:
                sidx += 1
                if os.path.exists(os.path.join(ses,'masked_imgs_preproc.npy')):
                    masked_imgs = np.load(os.path.join(ses, 'masked_imgs_preproc.npy'), 
                                          allow_pickle=True)
                else:    
                    movie_imgs = sorted(glob.glob(ses + '/' + filepattern))
                    masked_imgs = img_masker.transform(movie_imgs)
                    np.save(os.path.join(ses, 'masked_imgs_preproc.npy'), masked_imgs)

                movie_arrays.append(masked_imgs)

The next few cells test whether the timeseries add up to 0

In [7]:
masked_imgs.shape

(3, 325, 46448)

In [8]:
a = np.squeeze(masked_imgs[0,...])

In [9]:
a.shape

(325, 46448)

In [31]:
avg_tc = np.mean(a,1)
type(avg_tc)

numpy.ndarray

In [35]:
avg_tc = avg_tc.reshape(len(avg_tc),1)

In [36]:
mat_dif = a - avg_tc

In [37]:
mat_dif.shape

(325, 46448)

In [28]:
np.apply_along_axis?

In [None]:
# Concatenate all the runs belonging to each subject, 
# and then create a list of lists with all subjects' data
sub_movie = []
# nsess = len(movie_arrays[0])
for i in range(0, nsub*sessn, sessn):
    part = []
    for j in range(sessn):
        # The inner concatenates create one list each for each session
        # The outer concatenate creates one list with data from all runs
        part.append(np.concatenate(movie_arrays[i+j]))
    sub_movie.append(np.concatenate(part).T)

In [None]:
# Calculate the average timeseries and subtract it from the data. 
# We want the data to sum to zero in both the temporal and spatial domains.

In [None]:
# Fit the FastSRM model with the data
fast_srm = FastSRM(
    atlas=atlas,
    n_components=20,
    n_jobs=1,
    n_iter=10,
    temp_dir='/tmp',
    low_ram=True, 
    aggregate="mean" 
)
fast_srm.fit(sub_movie)

In [None]:
# Check to make sure that the spatial maps sum to zero 
# (i.e., sum of all voxel values is 0)
# Run CanICA on the spatial maps