# Connectivity Classifier


## Inputs

Connectivity matrices

## Outputs

Participant's label, either AVGP or NVGP.

## Requirements

To run this notebook, you need to have the rapids packages installed:

```
mamba create -n rapids-22.04 -c rapidsai -c nvidia  \
    rapids=22.04 python=3.9 cudatoolkit=11.5 \
    jupyterlab nilearn xarray necdf4
pip install ray[tune] tune-sklearn scikit-optimize
```

In [8]:
from cuml.model_selection import GridSearchCV

import numpy as np

from pathlib import Path
from python.acnets.pipeline import ConnectivityPipeline, ConnectivityVectorizer

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedShuffleSplit

from cuml import UMAP, SVC

from IPython.display import display, clear_output

from ray.tune.sklearn import TuneSearchCV

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [9]:

# Parameters

#  WARNING: Ray Tune requires these params to be absolute paths.
BIDS_DIR = Path.home() / 'workspace/acnets/data/julia2018'
PARCELLATION_CACHE_DIR = Path.home() / 'workspace/acnets/data/julia2018_resting'

In [10]:
subjects = ConnectivityPipeline().transform('all').coords['subject'].values
groups = [s[:4] for s in subjects]  # AVGP or NVGP


X = subjects.reshape(-1, 1)

y_encoder = LabelEncoder()
y = y_encoder.fit_transform(groups)

In [25]:
pipe  = Pipeline([
    ('connectivity', ConnectivityPipeline(agg_networks=True,
                                          bids_dir=BIDS_DIR,
                                          parcellation_cache_dir=PARCELLATION_CACHE_DIR)),
    ('vectorize', ConnectivityVectorizer()),
    ('scale', 'passthrough'),
    ('zerovar', 'passthrough'),
    ('select', 'passthrough'),
    ('reduce', 'passthrough'),
    ('clf', SVC(probability=True))
])

# DEBUG
pipe.fit(X, y).score(X, y)

0.4375

In [26]:
param_space = {
    # 'scale': [StandardScaler(), 'passthrough'],
    # 'select': [SelectFromModel(SVC(kernel='linear')),
    #            SelectFromModel(LinearSVC(C=.1, max_iter=100000)),
    #            'passthrough'],
    # 'reduce': ['passthrough', FastICA(n_components=5, max_iter=1000)],
    'reduce': ['passthrough', UMAP(n_components=2, n_neighbors=2)],
    # 'zerovar': [VarianceThreshold(threshold=0.0), 'passthrough'],
    # 'clf': [LinearSVC(C=.1, max_iter=100000), RandomForestClassifier(), SVC(kernel='linear', probability=True)],
    'clf__kernel': ['linear', 'rbf'],
    'clf__C': (1e-2, 1e3, 'log-uniform'),
    'clf__kernel': ['linear', 'rbf'],
    'connectivity__atlas': ['dosenbach2010'],
    'connectivity__kind': ['correlation', 'chatterjee'],
    'connectivity__agg_networks': [True],
}

In [27]:
# Parameter Tuning + Model Fitting

cv = StratifiedShuffleSplit(n_splits=100, test_size=0.25)

opt = TuneSearchCV(
    pipe,
    param_space,
    cv=cv,
    n_trials=100,
    # early_stopping=True,
    # max_iters=10,
    search_optimization='bayesian',
    verbose=1,
    scoring='accuracy',
    use_gpu=True,
    # search_kwargs={'resources_per_trial': {'gpu':1}},
    n_jobs=-1)

# opt = GridSearchCV(pipe, param_grid, verbose=2, cv=cv, scoring='accuracy', n_jobs=-1)

opt.fit(X, y)

# clear_output()
print('best estimator:', opt.best_estimator_)
