In [2]:
import sys
sys.path.insert(0, '/Users/matty_gee/Dropbox/Projects/Code/utilities')
sys.path.insert(0, '/Users/matty_gee/Dropbox/Projects/Code/snt_behavior/preprocessing')
sys.path.insert(0, '..') 

from py_standard_modules import *
from circ_stats import *

from sklearn.preprocessing import LabelEncoder
from nilearn.maskers import NiftiMapsMasker
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
from sklearn.pipeline import Pipeline
from nilearn.connectome import ConnectivityMeasure
from sklearn.svm import LinearSVC
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import GridSearchCV

character_roles = ['first','second','assistant','newcomb','hayworth','neutral'] 
task_details = pd.read_excel('task_details.xlsx')
task_details.sort_values(by='decision_num', inplace=True)
decision_details = task_details[task_details['trial_type'] == 'Decision']



In [1]:
%matplotlib inline

In [3]:
onsets  = decision_details['onset'].values
offsets = decision_details['offset'].values
for on, off in zip(onsets, offsets):
    print(on, off)

55.905 67.896
71.885 83.875
91.864 103.854
119.819 131.809
139.786 151.776
161.753 173.743
181.72 193.71
193.711 205.701
221.666 233.656
245.633 257.623
293.528 305.518
333.459 345.449
365.414 377.404
385.381 397.371
405.348 417.338
423.315 435.305
455.246 467.236
479.213 491.203
503.156 515.146
545.051 557.041
561.03 573.02
584.997 596.987
608.952 620.942
626.931 638.921
648.898 660.888
666.877 678.867
692.82 704.81
730.763 742.753
762.706 774.696
814.565 826.555
840.52 852.51
860.487 872.477
888.454 900.444
904.433 916.423
926.388 938.379
946.356 958.346
968.323 980.313
988.29 1000.28
1006.269 1018.259
1028.236 1040.226
1048.203 1060.193
1064.182 1076.172
1086.149 1098.139
1102.128 1114.118
1118.107 1130.097
1138.074 1150.064
1168.017 1180.007
1180.008 1191.998
1201.975 1213.965
1225.93 1237.919
1249.896 1261.886
1275.863 1287.853
1297.83 1309.82
1323.797 1335.787
1339.776 1351.766
1361.755 1373.745
1379.734 1391.724
1395.713 1407.703
1417.692 1429.682
1439.659 1451.649
1459.638 1471


# Functional connectivity

This example compares different kinds of functional connectivity between
regions of interest : correlation, partial correlation, and tangent space
embedding.

The resulting connectivity coefficients can be used to
discriminate children from adults. In general, the tangent space embedding
**outperforms** the standard correlations: see [Dadi et al 2019](https://www.sciencedirect.com/science/article/pii/S1053811919301594)
for a careful study.

.. include:: ../../../examples/masker_note.rst


## Load fMRI dataset and MSDL atlas

In [None]:
from nilearn import datasets
development_dataset = datasets.fetch_development_fmri(n_subjects=60)

We use probabilistic regions of interest (ROIs) from the MSDL atlas.



In [None]:
msdl_data   = datasets.fetch_atlas_msdl()
msdl_coords = msdl_data.region_coords

masker = NiftiMapsMasker(msdl_data.maps, resampling_target="data", t_r=2, detrend=True,
                        low_pass=.1, high_pass=.01, memory='nilearn_cache', memory_level=1).fit()
masked_data = [masker.transform(func, confounds) for (func, confounds) in zip(development_dataset.func, development_dataset.confounds)]

## What kind of connectivity is most powerful for classification?
we will use connectivity matrices as features to distinguish children from
adults. We use cross-validation and measure classification accuracy to
compare the different kinds of connectivity matrices.



In [None]:
# prepare the classification pipeline
kinds = ['correlation', 'partial correlation', 'tangent']

pipe = Pipeline(
    [('connectivity', ConnectivityMeasure(vectorize=True)),
     ('classifier', GridSearchCV(LinearSVC(), {'C': [.1, 1., 10.]}, cv=5))])

param_grid = [
    {'classifier': [DummyClassifier(strategy='most_frequent')]},
    {'connectivity__kind': kinds}
]

We use random splits of the subjects into training/testing sets.
StratifiedShuffleSplit allows preserving the proportion of children in the
test set.



In [None]:
groups  = [pheno['Child_Adult'] for pheno in development_dataset.phenotypic]
classes = LabelEncoder().fit_transform(groups)

cv = StratifiedShuffleSplit(n_splits=30, random_state=0, test_size=10)
gs = GridSearchCV(pipe, param_grid, scoring='accuracy', cv=cv, verbose=1,
                  refit=False, n_jobs=8)
gs.fit(masked_data, classes)
mean_scores = gs.cv_results_['mean_test_score']
scores_std  = gs.cv_results_['std_test_score']

display the results



In [None]:
from matplotlib import pyplot as plt

plt.figure(figsize=(6, 4))
positions = [.1, .2, .3, .4]
plt.barh(positions, mean_scores, align='center', height=.05, xerr=scores_std)
yticks = ['dummy'] + list(gs.cv_results_['param_connectivity__kind'].data[1:])
yticks = [t.replace(' ', '\n') for t in yticks]
plt.yticks(positions, yticks)
plt.xlabel('Classification accuracy')
plt.gca().grid(True)
plt.gca().set_axisbelow(True)
plt.tight_layout()

This is a small example to showcase nilearn features. In practice such
comparisons need to be performed on much larger cohorts and several
datasets.
[Dadi et al 2019](https://www.sciencedirect.com/science/article/pii/S1053811919301594)
Showed that across many cohorts and clinical questions, the tangent
kind should be preferred.



In [None]:
plt.show()