In [2]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np

from nibabel.loadsave import ImageFileError

from nilearn.image import load_img
from nilearn import datasets
from nilearn.maskers import NiftiMasker, NiftiLabelsMasker
from nilearn.connectome import ConnectivityMeasure

from sklearn.preprocessing import StandardScaler

from nilearn import plotting

from itertools import combinations
from tqdm.notebook import tqdm
import itertools

from scipy.stats import spearmanr, entropy

from scipy.ndimage import gaussian_filter1d, uniform_filter1d, maximum_filter1d, minimum_filter1d

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from scipy.linalg import logm
import os.path

**Getting timeseries from fMRI scans**

In [2]:
atlas = datasets.fetch_atlas_aal()
atlas_filename = atlas.maps
atlas_labels = atlas.labels
masker = NiftiLabelsMasker(atlas_filename, detrend=False)

def get_time_series(path):
    # print(path)
    if not os.path.isfile(path):
        return None
    try:
        fmri_img = load_img(path)
    except ImageFileError:  # no run for session
        return None
    
    time_series = masker.fit_transform(fmri_img)
    time_series = StandardScaler().fit_transform(time_series[:, :])
    return time_series

In [3]:
def get_ts_by_info(subj_id, session_id, run_id):
    return get_time_series(f'./raw/timeseries/subj0{subj_id}/timeseries_session{session_id:02d}_run{run_id:02d}.nii.gz')

In [4]:
SUBJ_IDS = (2,)  # need range(1, 8+1) --- download
SESSION_IDS = range(1, 37+1)  # need range(1, 37+1) --- download, or use range(1, 23+1)
RUN_IDS = range(1, 14+1)  # need range(1, 14+1) --- check 13 and 14 - may be empty

trend_size = 50

T = []
X = []
y = []

for subj_id, session_id, run_id in tqdm(itertools.product(SUBJ_IDS, SESSION_IDS, RUN_IDS),
                                        total=len(SUBJ_IDS) * len(SESSION_IDS) * len(RUN_IDS),
                                        desc='Getting timeseries'):
    time_series = get_ts_by_info(subj_id, session_id, run_id)
    if time_series is None:
        continue
    
    # get trend and subtract it
    time_series_trend = uniform_filter1d(time_series, trend_size, axis=0)
    time_series_filtered = time_series - time_series_trend
                        
    # standardize
    time_series_std = StandardScaler().fit_transform(time_series)
    time_series_filtered_std = StandardScaler().fit_transform(time_series_filtered)
                        
    # get connectivity matrices
    A_raw = ConnectivityMeasure(kind="correlation").fit_transform([time_series_std])[0]
    A_filtered = ConnectivityMeasure(kind="correlation").fit_transform([time_series_filtered_std])[0]
    
    # set data
    T.append([time_series_std, time_series_filtered_std])
    X.append([A_raw, A_filtered])
    y.append(subj_id)
T = np.array(T)
X = np.array(X)
y = np.array(y)

Getting timeseries:   0%|          | 0/518 [00:00<?, ?it/s]

In [5]:
np.save("T_subjects_2_nsd_vs_rest.npy", T)
np.save("X_subjects_2_nsd_vs_rest.npy", X)
np.save("y_subjects_2_nsd_vs_rest.npy", y)

In [5]:
n_trials = 478
n_steps = 226
n_rois = 116

# T = np.zeros((2 * n_trials, 2, n_steps, n_rois))
# X = np.zeros((2 * n_trials, 2, n_rois, n_rois)) # subject, raw/filtered, matrix
y_subjects = np.repeat([0, 1], n_trials)
y_sessions = np.tile(np.concatenate([np.repeat(0, 240), np.repeat(1, 238)]), 2)
y = np.concatenate([np.expand_dims(y_subjects, 0), np.expand_dims(y_sessions, 0)], axis=0)

In [6]:
T.shape, X.shape, y.shape

((956, 2, 226, 116), (956, 2, 116, 116), (2, 956))

In [7]:
np.save("T_subjects_1,5_nsd_vs_rest.npy", T)
np.save("X_subjects_1,5_nsd_vs_rest.npy", X)
np.save("y_subjects_1,5_nsd_vs_rest.npy", y)

In [3]:
T15 = np.load("T_subjects_1,5_nsd_vs_rest.npy")
X15 = np.load("X_subjects_1,5_nsd_vs_rest.npy")
T2 = np.load("T_subjects_2_nsd_vs_rest.npy")
X2 = np.load("X_subjects_2_nsd_vs_rest.npy")
T15.shape, X15.shape, T2.shape, X2.shape

((956, 2, 226, 116),
 (956, 2, 116, 116),
 (464, 2, 226, 116),
 (464, 2, 116, 116))

In [10]:
T152 = np.concatenate((T15, T2))
X152 = np.concatenate((X15, X2))
y152 = np.concatenate((y15, y2))
T152.shape, X152.shape, y152.shape

((1420, 2, 226, 116), (1420, 2, 116, 116), (1420,))

In [11]:
np.save("T_subjects_152.npy", T152)
np.save("X_subjects_152.npy", X152)
np.save("y_subjects_152.npy", y152)

In [8]:
y15 = np.load("y_subjects_1,5_nsd_vs_rest.npy")[0]
y2 = np.load("y_subjects_2_nsd_vs_rest.npy")
y15.shape, y2.shape

((956,), (464,))

In [16]:
np.save("y_subjects_152.npy", y152)

In [13]:
y15[y15 == 1] = 5
y15[y15 == 0] = 1
y15

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [3]:
y = np.load("y_subjects_152.npy")

In [4]:
y

array([1, 1, 1, ..., 2, 2, 2])

In [26]:
SUBJ_IDS = (1, 5, 2)  # need range(1, 8+1) --- download
SESSION_IDS = range(1, 37+1)  # need range(1, 37+1) --- download, or use range(1, 23+1)

y0 = []
y1 = []
y2 = []

for subj_id in SUBJ_IDS:
    for session_id in SESSION_IDS:
        if (subj_id in (1, 5) and session_id in range(21, 38+1)) or (subj_id not in (1, 5) and session_id in range(21, 30+1)):
            y0 += [subj_id] * 14
            y1 += [1] * 14
            y2 += [1] + [0] * 12 + [1]
        else:
            y0 += [subj_id] * 12
            y1 += [(0 if session_id <= 20 else 1)] * 12
            y2 += [0] * 12
y0 = np.array(y0)
y1 = np.array(y1)
y2 = np.array(y2)

In [30]:
Y = np.array([y0, y1, y2])

In [31]:
Y.shape

(3, 1420)

In [32]:
# visual cortex
visual = list(range(42, 53+1))
# sensorimotor network, somatomotor network (SMN)
smn = [0, 1, 6, 7, 18, 19, 56, 57, 62, 63, 68, 69]
# ventral attention network (VAN), ventral frontoparietal network (VFN), ventral attention system (VAS)
van = [32, 33, 34, 35, 36, 37, 52, 53, 62, 63, 64, 65]
# default mode network (DMN), default network, default state network, medial frontoparietal network (M-FPN)
dmn = [22, 23, 34, 35, 36, 37, 38, 39, 64, 65, 66, 67]

In [34]:
X_raw = np.load("X_subjects_152.npy")[:, 1]
X_raw.shape

(1420, 116, 116)

In [35]:
X = []
for brain_network in (visual, smn, van, dmn):
    bn_idx = np.ix_(np.arange(0, X_raw.shape[0]), brain_network, brain_network)
    X_bn = X_raw[bn_idx]
    X.append(X_bn)
X = np.array(X)
X.shape

(4, 1420, 12, 12)

In [37]:
np.save("corrs.npy", X)
np.save("labels.npy", Y)

In [39]:
X.shape, Y.shape

((4, 1420, 12, 12), (3, 1420))

In [42]:
(Y[1] == 0).sum()

720

In [29]:
y.shape, y0.shape

((1420,), (1420,))

In [12]:
yz = np.load("y_subjects_1,5_nsd_vs_rest.npy")

In [16]:
yz.shape

(2, 956)

In [20]:
y1.shape, yz[1].shape

((956,), (956,))

In [25]:
(y1 != yz[1]).sum()

0