<a href="https://colab.research.google.com/github/nmningmei/Deep_learning_fMRI_EEG/blob/master/10_1_searchlight_representational_similarity_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# The script to illustrate a way to perform searchlight RSA.

# Get the extracted features and the mask files

In [29]:
try:
    !git clone https://github.com/nmningmei/Extracted_features_of_Spanish_image_dataset.git
except:
    !ls Extracted_features_of_Spanish_image_dataset

fatal: destination path 'Extracted_features_of_Spanish_image_dataset' already exists and is not an empty directory.


# Get the fMRI data

In [30]:
# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id = '1vLWSdXpOxqp3jOCypsWA27AEptsGdwav'
downloaded = drive.CreateFile({'id': file_id})


In [31]:
downloaded.GetContentFile('whole_bran.tar.gz')

In [32]:
!tar -xvf whole_bran.tar.gz

whole_brain_conscious.csv
whole_brain_unconscious.csv
whole_brain_unconscious.nii.gz
whole_brain_conscious.nii.gz


In [33]:
ls

adc.json                                      [0m[01;32mwhole_brain_conscious.nii.gz[0m*
[01;34mExtracted_features_of_Spanish_image_dataset[0m/  [01;32mwhole_brain_unconscious.csv[0m*
[01;34msample_data[0m/                                  [01;32mwhole_brain_unconscious.nii.gz[0m*
[01;32mwhole_brain_conscious.csv[0m*                    whole_bran.tar.gz


# install and update some of the libraries if not 

In [38]:
try:
    from nilearn.input_data import NiftiMasker
    from brainiak.searchlight.searchlight import Searchlight
    from brainiak.searchlight.searchlight import Ball
except:
    !pip install nilearn
    !python3 -m pip install -U brainiak
    from nilearn.input_data import NiftiMasker
    from brainiak.searchlight.searchlight import Searchlight
    from brainiak.searchlight.searchlight import Ball

import os,gc

import pandas as pd
import numpy as np

from nibabel import load as load_fmri
from joblib  import Parallel,delayed

In [39]:
def normalize(data,axis = 1):
    return data - data.mean(axis).reshape(-1,1)
# Define voxel function
def sfn(l, msk, myrad, bcast_var):
    """
    l: BOLD
    msk: mask array
    myrad: not use
    bcast_var: label -- CNN features
    """
    X = l[0][msk,:].T.copy() # vectorize the voxel values in the sphere
    y = bcast_var.copy() # vectorize the RDM
    # pearson correlation
    RDM_X   = distance.pdist(normalize(BOLD),'correlation')
    RDM_y   = distance.pdist(normalize(model),'correlation')
    D,p     = spearmanr(RDM_X,RDM_y)
    return D
def process_csv(file_name = 'whole_brain_conscious.csv'):
    """
    to add some info to the event files to create better cross-validation folds
    """
    df_data = pd.read_csv(file_name)
    df_data['id'] = df_data['session'] * 1000 + df_data['run'] * 100 + df_data['trials']
    df_data = df_data[df_data.columns[1:]]
    return df_data

In [42]:
radius              = 3 # in mm, the data has voxel size of 2.4mm x 2.4mm x 2.4mm
feature_dir         = 'Extracted_features_of_Spanish_image_dataset/computer_vision_features_no_background'
model_name          = 'VGG19'
label_map           = {'Nonliving_Things':[0,1],'Living_Things':[1,0]}
whole_brain_mask    = 'Extracted_features_of_Spanish_image_dataset/combine_BOLD.nii.gz'
average             = True
n_splits            = 10 # recommend to perform the resampling for more than 500 times
n_jobs              = -1

In [43]:
for conscious_state in ['unconscious','conscious']:
    np.random.seed(12345)

    df_data         = process_csv(f'whole_brain_{conscious_state}.csv')

    BOLD_image      = load_fmri(f'whole_brain_{conscious_state}.nii.gz')
    print(f'{conscious_state}\tfMRI in {BOLD_image.shape} events in {df_data.shape}')
    targets         = np.array([label_map[item] for item in df_data['targets']])[:,-1]
    images          = df_data['paths'].apply(lambda x: x.split('.')[0] + '.npy').values
    CNN_feature     = np.array([np.load(os.path.join(feature_dir,
                                                     model_name,
                                                     item)) for item in images])
    groups          = df_data['labels'].values

    # define a function to create the folds first
    def _proc(df_data):
        """
        This is useful when the number of folds are thousands
        """
        df_picked = df_data.groupby('labels').apply(lambda x: x.sample(n = 1).drop('labels',axis = 1)).reset_index()
        df_picked = df_picked.sort_values(['targets','subcategory','labels'])
        idx_test  = df_picked['level_1'].values
        return idx_test
    print(f'partitioning data for {n_splits} folds')
    idxs = Parallel(n_jobs = -1, verbose = 1)(delayed(_proc)(**{
                'df_data':df_data,}) for _ in range(n_splits))
    gc.collect() # free memory that is occupied by garbage

    # define a function to run the RSA
    def _searchligh_RSA(idx,
                        sl_rad = radius, 
                        max_blk_edge = radius - 1, 
                        shape = Ball,
                        min_active_voxels_proportion = 0,
                        ):
        # Brainiak function
        sl = Searchlight(sl_rad = sl_rad, 
                        max_blk_edge = max_blk_edge, 
                        shape = shape,
                        min_active_voxels_proportion = min_active_voxels_proportion,
                        )
        # distribute the data based on the sphere
        ## the first input is usually the BOLD signal, and it is in the form of 
        ## lists not arrays, representing each subject
        ## the second input is usually the mask, and it is in the form of array
        sl.distribute([np.asanyarray(BOLD_image.dataobj)[:,:,:,idx]], 
                        np.asanyarray(load_fmri(whole_brain_mask).dataobj) == 1)
        # broadcasted data is the data that remains the same during RSA
        sl.broadcast(CNN_feature[idx])
        # run searchlight algorithm
        global_outputs = sl.run_searchlight(sfn,
                                            pool_size = 1, # we run each RSA using a single CPU
                                            )
        return global_outputs
    for _ in range(10):
        gc.collect()
    res = Parallel(n_jobs = -1,verbose = 1,)(delayed(_searchligh_RSA)(**{
                'idx':idx}) for idx in idxs)
    # save the data
    results_to_save = np.zeros(np.concatenate([BOLD_image.shape[:3],[n_splits]]))
    for ii,item in enumerate(res):
        results_to_save[:,:,:,ii] = np.array(item, dtype=np.float)
    results_to_save = new_img_like(BOLD_image,results_to_save,)
    results_to_save.to_filename(os.path.join(output_dir,f'{conscious_state}.nii.gz'))

unconscious	fMRI in (88, 88, 66, 782) events in (782, 20)
partitioning data for 10 folds


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    3.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


NameError: ignored