In [3]:
from nilearn import datasets
from nilearn.input_data import NiftiLabelsMasker
from nilearn.connectome import ConnectivityMeasure
from argparse import ArgumentParser
import numpy as np
from sklearn.decomposition import PCA
import os
import pandas as pd

In [1]:
#CHANGE PATH (data_dir) TO WERE YOU WANT TO STORE THE DATA
abide = datasets.fetch_abide_pcp(data_dir="/Users/htamvada/tmp_data",
                                 pipeline="cpac",
                                 quality_checked=True, n_subjects=3)

# Transform phenotypic data into dataframe
abide_pheno = pd.DataFrame(abide.phenotypic)

# Extract group info
groups = []
for s in abide_pheno.SITE_ID:
    groups.append(s.decode()) # for some reason the site names are of type 'bytes'


Dataset created in /Users/htamvada/tmp_data/ABIDE_pcp

Downloading data from https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Phenotypic_V1_0b_preprocessed1.csv ...


Downloaded 449443 of 449443 bytes (100.0%,    0.0s remaining) ...done. (1 seconds, 0 min)
  output = genfromtxt(fname, **kwargs)


Downloading data from https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Outputs/cpac/nofilt_noglobal/func_preproc/Pitt_0050003_func_preproc.nii.gz ...


Downloaded 104419884 of 104419884 bytes (100.0%,    0.0s remaining) ...done. (6 seconds, 0 min)


Downloading data from https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Outputs/cpac/nofilt_noglobal/func_preproc/Pitt_0050004_func_preproc.nii.gz ...


Downloaded 107986683 of 107986683 bytes (100.0%,    0.0s remaining) ...done. (6 seconds, 0 min)


Downloading data from https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Outputs/cpac/nofilt_noglobal/func_preproc/Pitt_0050005_func_preproc.nii.gz ...


Downloaded 110518334 of 110518334 bytes (100.0%,    0.0s remaining) ...done. (6 seconds, 0 min)


In [4]:
# make list of filenames
fmri_filenames = abide.func_preproc

# load atlas
multiscale = datasets.fetch_atlas_basc_multiscale_2015()
atlas_filename = multiscale.scale064

# initialize masker object
masker = NiftiLabelsMasker(labels_img=atlas_filename,
                           standardize=True,
                           memory='nilearn_cache',
                           verbose=0)

# initialize correlation measure
correlation_measure = ConnectivityMeasure(kind='correlation', vectorize=True,
                                         discard_diagonal=True)

In [5]:
correlation_measure

ConnectivityMeasure(cov_estimator=LedoitWolf(assume_centered=False,
                                             block_size=1000,
                                             store_precision=False),
                    discard_diagonal=True, kind='correlation', vectorize=True)

In [6]:
atlas_filename

'/Users/htamvada/nilearn_data/basc_multiscale_2015/template_cambridge_basc_multiscale_nii_sym/template_cambridge_basc_multiscale_sym_scale064.nii.gz'

In [7]:
fmri_filenames

['/Users/htamvada/tmp_data/ABIDE_pcp/cpac/nofilt_noglobal/Pitt_0050003_func_preproc.nii.gz',
 '/Users/htamvada/tmp_data/ABIDE_pcp/cpac/nofilt_noglobal/Pitt_0050004_func_preproc.nii.gz',
 '/Users/htamvada/tmp_data/ABIDE_pcp/cpac/nofilt_noglobal/Pitt_0050005_func_preproc.nii.gz']

In [20]:
X_features = []

In [8]:
# extract the timeseries from the ROIs in the atlas
time_series = masker.fit_transform(fmri_filenames[0])

In [9]:
time_series

array([[ 0.34946893,  0.21255069,  0.32162783, ...,  0.04455612,
        -0.74631306, -0.74398049],
       [ 1.25591628,  1.68573687,  1.23214702, ...,  1.99766476,
         2.28793563,  1.3609068 ],
       [ 0.79581354,  0.44042242, -0.64273411, ...,  1.16094275,
         1.29602784, -0.12425872],
       ...,
       [-1.22488812, -1.44788828, -1.02308577, ..., -1.50944423,
        -1.40493041, -1.35589217],
       [ 0.15812634, -1.17902071, -0.2163547 , ..., -0.22004795,
         0.20060848, -1.18520845],
       [ 0.91473484,  1.32615049,  0.51178034, ...,  0.96631393,
         0.59919195,  0.367358  ]])

In [10]:
time_series.shape

(196, 64)

In [11]:
# create a region x region correlation matrix
correlation_matrix = correlation_measure.fit_transform([time_series])[0]

In [12]:
correlation_matrix.shape

(2016,)

In [21]:
X_features.append(correlation_matrix)

In [18]:
correlation_matrix

array([0.651508  , 0.36743073, 0.31826976, ..., 0.56326686, 0.53300758,
       0.54327946])

In [19]:
groups

['PITT', 'PITT', 'PITT']

In [22]:
time_series1 = masker.fit_transform(fmri_filenames[1])

In [23]:
correlation_matrix1 = correlation_measure.fit_transform([time_series1])[0]

In [24]:
X_features.append(correlation_matrix1)

In [25]:
X_features

[array([0.651508  , 0.36743073, 0.31826976, ..., 0.56326686, 0.53300758,
        0.54327946]),
 array([0.53258726, 0.3377011 , 0.49584742, ..., 0.40197062, 0.18363887,
        0.457722  ])]

In [26]:
time_series2 = masker.fit_transform(fmri_filenames[2])
correlation_matrix2 = correlation_measure.fit_transform([time_series2])[0]

In [27]:
X_features.append(correlation_matrix2)

In [28]:
X_features

[array([0.651508  , 0.36743073, 0.31826976, ..., 0.56326686, 0.53300758,
        0.54327946]),
 array([0.53258726, 0.3377011 , 0.49584742, ..., 0.40197062, 0.18363887,
        0.457722  ]),
 array([0.4190768 , 0.2041985 , 0.33218378, ..., 0.78088126, 0.34119973,
        0.56367589])]

In [30]:
pca = PCA(0.99).fit(X_features) # keeping 99% of variance
X_features_pca = pca.transform(X_features)


In [31]:
X_features_pca

array([[ 5.33061056, -1.37587697],
       [-1.3725236 ,  4.94287055],
       [-3.95808696, -3.56699358]])

In [32]:
X_features_pca.shape

(3, 2)

In [33]:
# Transform phenotypic data into dataframe
abide_pheno = pd.DataFrame(abide.phenotypic)

# Get the target vector
y_target = abide_pheno['DX_GROUP']

In [34]:
y_target

0    1
1    1
2    1
Name: DX_GROUP, dtype: int64

In [35]:
abide_pheno

Unnamed: 0,i,Unnamed_0,SUB_ID,X,subject,SITE_ID,FILE_ID,DX_GROUP,DSM_IV_TR,AGE_AT_SCAN,...,qc_notes_rater_1,qc_anat_rater_2,qc_anat_notes_rater_2,qc_func_rater_2,qc_func_notes_rater_2,qc_anat_rater_3,qc_anat_notes_rater_3,qc_func_rater_3,qc_func_notes_rater_3,SUB_IN_SMP
0,1,2,50003,2,50003,b'PITT',b'Pitt_0050003',1,1,24.45,...,b'',b'OK',b'',b'OK',b'',b'OK',b'',b'OK',b'',1
1,2,3,50004,3,50004,b'PITT',b'Pitt_0050004',1,1,19.09,...,b'',b'OK',b'',b'OK',b'',b'OK',b'',b'OK',b'',1
2,3,4,50005,4,50005,b'PITT',b'Pitt_0050005',1,1,13.73,...,b'',b'OK',b'',b'maybe',b'ic-parietal-cerebellum',b'OK',b'',b'OK',b'',0
