# Connectivity Classifier


## Inputs

Connectivity matrices

## Outputs

Participant's label, either AVGP or NVGP.

## Requirements

To run this notebook, you need to have the rapids packages installed:

```
mamba create -n rapids-22.04 -c rapidsai -c nvidia  \
    rapids=22.04 python=3.9 cudatoolkit=11.5 \
    jupyterlab nilearn xarray necdf4
pip install ray[tune] tune-sklearn scikit-optimize
```

In [12]:
from cuml.model_selection import GridSearchCV

import numpy as np

from pathlib import Path
from python.acnets.pipeline import ConnectivityPipeline, ConnectivityVectorizer

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.feature_selection import SelectFromModel

from cuml import UMAP, SVC, LinearSVC

from IPython.display import display, clear_output

from ray.tune.sklearn import TuneSearchCV

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [13]:

# Parameters

#  WARNING: Ray Tune requires these params to be absolute paths.
BIDS_DIR = Path.home() / 'workspace/acnets/data/julia2018'
PARCELLATION_CACHE_DIR = Path.home() / 'workspace/acnets/data/julia2018_resting'

In [14]:
subjects = ConnectivityPipeline().transform('all').coords['subject'].values
groups = [s[:4] for s in subjects]  # AVGP or NVGP


X = subjects.reshape(-1, 1)

y_encoder = LabelEncoder()
y = y_encoder.fit_transform(groups)

In [25]:
pipe  = Pipeline([
    ('connectivity', ConnectivityPipeline(agg_networks=True,
                                          bids_dir=BIDS_DIR,
                                          parcellation_cache_dir=PARCELLATION_CACHE_DIR)),
    ('vectorize', ConnectivityVectorizer()),
    ('scale', 'passthrough'),
    ('zerovar', 'passthrough'),
    ('select', 'passthrough'),
    ('reduce', UMAP(n_neighbors=2)),
    ('clf', LinearSVC(probability=True))
])

# DEBUG
pipe.fit(X, y).score(X, y)

0.40625

In [26]:
param_space = {
    'scale': [StandardScaler(), 'passthrough'],
    'select': [SelectFromModel(LinearSVC()),
               'passthrough'],
    # 'reduce': ['passthrough', FastICA(n_components=5, max_iter=1000)],
    'reduce__n_components': (2, 8, 'uniform'),
    # 'zerovar': [VarianceThreshold(threshold=0.0), 'passthrough'],
    # 'clf': [LinearSVC(C=.1, max_iter=100000), RandomForestClassifier(), SVC(kernel='linear', probability=True)],
    # 'clf__kernel': ['linear', 'rbf'],
    'clf__C': (1e-2, 1e3, 'log-uniform'),
    # 'connectivity__atlas': ['dosenbach2010'],
    'connectivity__kind': ['partial correlation', 'chatterjee', 'correlation'],
    # 'connectivity__agg_networks': [True],
}

In [27]:
# Parameter Tuning + Model Fitting

cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25)

opt = TuneSearchCV(
    pipe,
    param_space,
    cv=cv,
    n_trials=10,
    # early_stopping=True,
    # max_iters=10,
    search_optimization='bayesian',
    verbose=1,
    scoring='accuracy',
    use_gpu=True,
    n_jobs=-1)

opt.fit(X, y)

print('best estimator:', opt.best_estimator_)


2022-06-13 17:53:07,251	INFO tune.py:747 -- Total run time: 29.95 seconds (29.70 seconds for the tuning loop).


best estimator: Pipeline(steps=[('connectivity',
                 ConnectivityPipeline(bids_dir=PosixPath('/home/morteza/workspace/acnets/data/julia2018'), kind='partial correlation', parcellation_cache_dir=PosixPath('/home/morteza/workspace/acnets/data/julia2018_resting'))),
                ('vectorize', ConnectivityVectorizer()),
                ('scale', StandardScaler()), ('zerovar', 'passthrough'),
                ('select', 'passthrough'), ('reduce', UMAP()),
                ('clf', LinearSVC())])


In [29]:
from sklearn.model_selection import cross_val_score

cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25)

scores = cross_val_score(opt.best_estimator_, X, y, cv=cv, scoring='accuracy')
scores.mean(), scores.std()

(0.5375, 0.12562344526401112)

In [30]:
opt.cv_results_

{'params': [{'scale': 'passthrough',
   'select': 'passthrough',
   'reduce__n_components': 2,
   'clf__C': 0.013361734828006523,
   'connectivity__kind': 'chatterjee'},
  {'scale': StandardScaler(),
   'select': 'passthrough',
   'reduce__n_components': 4,
   'clf__C': 0.014967197778815907,
   'connectivity__kind': 'correlation'},
  {'scale': StandardScaler(),
   'select': SelectFromModel(estimator=LinearSVC()),
   'reduce__n_components': 6,
   'clf__C': 130.2879611867519,
   'connectivity__kind': 'partial correlation'},
  {'scale': StandardScaler(),
   'select': SelectFromModel(estimator=LinearSVC()),
   'reduce__n_components': 4,
   'clf__C': 0.011137517967898945,
   'connectivity__kind': 'chatterjee'},
  {'scale': StandardScaler(),
   'select': SelectFromModel(estimator=LinearSVC()),
   'reduce__n_components': 5,
   'clf__C': 913.1330245412576,
   'connectivity__kind': 'correlation'},
  {'scale': 'passthrough',
   'select': 'passthrough',
   'reduce__n_components': 5,
   'clf__C': 