
## Requirements

To run this notebook, you need to have the rapids packages installed:

```bash
mamba create -n rapids-22.04 \
    -c rapidsai -c nvidia -y \
    rapids=22.04 python=3.9 cudatoolkit=11.5 \
    jupyterlab ipykernel nilearn xarray netcdf4 seaborn shap \
    xarray netcdf4

mamba activate rapids-22.04

pip install "ray[tune]" tune-sklearn scikit-optimize
```

In [None]:
import os
from pathlib import Path

from python.acnets.pipeline import ConnectivityPipeline, ConnectivityVectorizer
from ray import tune
from ray.tune.sklearn import TuneSearchCV
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectFromModel, VarianceThreshold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import LinearSVC

try:
    from cuml import PCA, SVC, UMAP
    from cuml.model_selection import GridSearchCV
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
except ImportError as e:
    print('cuML not installed. Skipping GPU and falling back to scikit.')


In [None]:
#  WARNING: Ray Tune requires data paths to be absolute.
BIDS_DIR = Path.home() / 'workspace/acnets/data/julia2018'
CACHE_DIR = Path.home() / 'workspace/acnets/data/julia2018_resting'

In [None]:
# 1. DATA

subjects = ConnectivityPipeline().transform('all').coords['subject'].values
groups = [s[:4] for s in subjects]  # AVGP or NVGP

X = subjects.reshape(-1, 1)

y_encoder = LabelEncoder()
y = y_encoder.fit_transform(groups)

In [None]:
pipe  = Pipeline([
    ('connectivity', ConnectivityPipeline(bids_dir=BIDS_DIR, parcellation_cache_dir=CACHE_DIR)),
    ('vectorize', ConnectivityVectorizer()),
    ('scale', StandardScaler()),
    ('zerovar', VarianceThreshold()),
    ('select', SelectFromModel(LinearSVC(penalty='l1', dual=False, max_iter=10000))),
    ('reduce', PCA()),
    ('clf', LinearSVC(penalty='l1', dual=False, max_iter=10000))
])

In [None]:
param_space = {
    # 'scale': tune.choice([StandardScaler(), 'passthrough']),
    # 'select': tune.choice([SelectFromModel(LinearSVC(penalty='l2')), 'passthrough']),
    'clf__C': tune.loguniform(1e-2, 1e3),
    'connectivity__atlas': tune.choice(['dosenbach2010']),#, 'gordon2014_2mm', 'difumo_64_2mm', 'seitzman2018'],
    'connectivity__kind': tune.choice(['partial correlation', 'chatterjee', 'correlation', 'tangent']),
}

In [None]:
# HPO: Bayesian Optimization

opt = TuneSearchCV(
    pipe,
    param_space,
    cv=4,
    n_trials=10,
    # early_stopping=True,
    max_iters=2,
    search_optimization='bayesian',
    verbose=1,
    scoring='accuracy',
    use_gpu=True,
    n_jobs=-1)

opt.fit(X, y)

print('best estimator:', opt.best_estimator_)
