In [1]:
from sklearn.decomposition import FastICA
from nilearn.input_data import NiftiMasker
import matplotlib.pyplot as plt
import nibabel as nib
import numpy as np
import ibc_public
import os



In [9]:
# Some input folder and file info
task = 'clips'
datadir = os.path.join('..', task, '3mm')

In [3]:
# Specify the mask image
_package_directory = os.path.dirname(os.path.abspath(ibc_public.__file__))
mask_gm = nib.load(os.path.join(_package_directory, '../ibc_data', 'gm_mask_3mm.nii.gz'))

In [4]:
# Create a masker to standardize (0 mean, 1 SD) the image files
# and to transform them to a 2D array, as FastSRM requires
img_masker = NiftiMasker(mask_img=mask_gm, 
                              standardize=True, 
                              smoothing_fwhm=5,
                              detrend=True,
                              high_pass=1./128,
                              t_r=2.0).fit()

In [None]:
# Any specific files that should be used for FastSRM
if task == 'clips':
    filepattern = '*Trn*.nii.gz'
else:
    filepattern = '*.nii.gz'

In [10]:
# Now create a list of movie session files 
movie_dir = os.path.join('..', task, '3mm')
subs = sorted(os.listdir(movie_dir))
nsub = 0

movie_arrays = []

# Number of sessions per subject
# Different tasks have different numbers of sessions.
# Also, all subjects might not have completed all sessions.
if task == 'clips':
    # For the clips task, one subject doesn't have all 4 sessions, and
    # FastSRM requires that all subjects have the same numbers of TRs
    sessn = 3
else:
    sessn = 2

# Create 2D masked arrays from image data and save to file for quick and easy access
for s, sub in enumerate(subs):
    if os.path.isdir(os.path.join(movie_dir, sub)):
        nsub += 1
        sess = sorted(os.listdir(os.path.join(movie_dir, sub)))
        sidx = 0
       
        for i, ses in enumerate(sess):
            if os.path.isdir(os.path.join(movie_dir, sub, ses)) and sidx < sessn:
                sidx += 1
                if os.path.exists(os.path.join(movie_dir, sub, ses,'masked_imgs_preproc.npy')):
                    masked_imgs = np.load(os.path.join(movie_dir, sub, ses, 'masked_imgs_preproc.npy'), 
                                          allow_pickle=True)
                else:    
                    movie_imgs = sorted(glob.glob(os.path.join(movie_dir, sub, ses, filepattern)))
                    masked_imgs = img_masker.transform(movie_imgs)
                    np.save(os.path.join(movie_dir, sub, ses, 'masked_imgs_preproc.npy'), masked_imgs)

                movie_arrays.append(masked_imgs)

In [11]:
# Concatenate all the runs belonging to each subject, 
# and then create a list of lists with all subjects' data
sub_movie = []
# nsess = len(movie_arrays[0])
for i in range(0, nsub*sessn, sessn):
    part = []
    for j in range(sessn):
        # The inner concatenates create one list each for each session
        # The outer concatenate creates one list with data from all runs
        part.append(np.concatenate(movie_arrays[i+j]))
    sub_movie.append(np.concatenate(part).T)
#         sub_movie.append(np.concatenate((np.concatenate(movie_arrays[i]), np.concatenate(movie_arrays[i+1]))).T)

In [12]:
np.save(os.path.join(movie_dir, task + '_concat_data.npy'), np.concatenate(sub_movie,1))

In [None]:
len(sub_movie)

In [None]:
np.concatenate(sub_movie,1).shape

In [None]:
# Set up some parameters for the ICA
n_components = 20
random_state = 0
tol = 0.005
max_iter = 5000

In [None]:
# Initialize the ICA model
fast_ica = FastICA(n_components=n_components,
                  random_state=random_state)
#                   tol=tol,
#                   max_iter=max_iter)

In [None]:
# Transform input data using the ICA model
data_transform = fast_ica.fit_transform(np.concatenate(sub_movie,1).T).T

In [None]:
np.save(os.path.join(datadir, 'fastica_components.npy'), data_transform)

In [None]:
data_transform.shape

In [None]:
# These are what the ICA components look like
fig, axs = plt.subplots(n_components, sharex=True, sharey=True, 
                        figsize=(10,50))
for i in range(n_components):
    axs[i].plot(data_transform[i,:])
    axs[i].set_title('ICA component #' + str(i+1))