# MultiScale Classifier

Sections:

1. Data
2. Hyper-parameter space
3. HPO
4. Cross-validation scores


## Setup

In [1]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import json
import ray
from IPython.display import clear_output
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import Parallel, delayed
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from scipy import stats
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit, cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from xgboost import XGBClassifier

import skexplain
from skexplain.common.importance_utils import to_skexplain_importance

from src.acnets.pipeline import MultiScaleClassifier, Parcellation
from functools import partial
from ray import train

In [2]:
N_RUNS = 100      # 10 independent train/test runs
TEST_SIZE = .25  # proportion of test subjects out of 32 subjects

## Prepare data

In [3]:
# Xy
subjects = Parcellation(atlas_name='dosenbach2010',
                        bids_dir='/Users/morteza/workspace/ACNets/data/julia2018/',
                        cache_dir='/Users/morteza/workspace/ACNets/data/julia2018/derivatives/resting_timeseries/',
                        ).fit_transform(None).coords['subject'].values
X = subjects.reshape(-1,1)                                  # subjects ids, shape: (n_subjects, 1)

y_encoder = LabelEncoder()
y = y_encoder.fit_transform([s[:4] for s in subjects])      # labels (AVGP=1 or NVGP=1), shape: (n_subjects,)
y_mapping = dict(zip(y_encoder.classes_, y_encoder.transform(y_encoder.classes_)))

# DEBUG (report label mapping)
print('[DEBUG] label mapping:', y_mapping)

# DEBUG (expected to overfit, i.e., accuracy is 1)
overfit_score = MultiScaleClassifier().fit(X, y).score(X, y)
print(f'[DEBUG] overfit accuracy: {overfit_score:.3f}')

[DEBUG] label mapping: {'AVGP': 0, 'NVGP': 1}
[DEBUG] overfit accuracy: 1.000


## Hyper-parameter tuning

### Parameter space

In [4]:
param_space = {
    # 'atlas': ['dosenbach2010', 'gordon2014_2mm', 'difumo_64_2mm'],
    'atlas': tune.choice(['dosenbach2010']),
    'kind': tune.choice(['partial correlation']),
    'extract_h1_features': tune.grid_search([True]),
    'extract_h2_features': tune.grid_search([True]),
    'extract_h3_features': tune.grid_search([True]),
    # 'clf__subsample': tune.choice([.5, .8, 1]),
    'clf__n_estimators': tune.grid_search([100, 200, 500, 1000]),
    'clf__max_depth': tune.grid_search([2, 4, 5, 8, 10, 20, 100]),
    'clf__learning_rate': tune.grid_search([.01, .1, .3, .5, 1]),
}


In [5]:
# now we define the objective function

def eval_multiscale_model(config, classifier, X, y, n_iter=10):

    pipe = MultiScaleClassifier(classifier=classifier).set_params(**config)

    for _ in range(n_iter):
        # outer CV (for test set), and inner CV (for validation set)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, stratify=y)
        inner_cv = StratifiedKFold(n_splits=8, shuffle=True)

        # fit and score the validation set
        val_score = cross_val_score(pipe, X_train, y_train, scoring='accuracy', cv=inner_cv).mean()

        # test score (we only report this and do not use it during HPO)
        test_score = pipe.fit(X_train, y_train).score(X_test, y_test)

        train.report({'val_accuracy': val_score, 'test_score': test_score})

### HPO

In [6]:
# prep the hyper-parameter space and init the objective function
output_name = f'models/multiscale_classifier-XGBClassifier-hpo.json'

clf = XGBClassifier(base_score=.5, objective='binary:logistic')
objective_func = partial(eval_multiscale_model, classifier=clf, X=X, y=y)

ray.shutdown(); ray.init()

tuner = tune.Tuner(
    objective_func,
    param_space=param_space,
    tune_config=tune.TuneConfig(
        metric='val_accuracy',
        mode='max'
    )
)

tuner.fit()
ray.shutdown()

clear_output()
best_result = tuner.get_results().get_best_result(scope='avg')
best_score = best_result.metrics['val_accuracy']
best_params = best_result.config

# store the best hyper-parameters
with open(output_name, 'w') as f:
    best_params['classifier'] = 'XGBClassifier'
    json.dump(best_params, f, indent=2)
    del best_params['classifier']  # TEMP: remove the classifier name

# report best score and best model
print('[DEBUG] Best HPO validation score:', best_score)
MultiScaleClassifier(classifier=clf).set_params(**best_params)

0,1
Current time:,2024-03-03 23:08:44
Running for:,00:05:49.51
Memory:,9.5/16.0 GiB

Trial name,status,loc,atlas,clf__learning_rate,clf__max_depth,clf__n_estimators,extract_h1_features,extract_h2_features,extract_h3_features,kind,iter,total time (s),val_accuracy,test_score
eval_multiscale_model_c9638_00012,RUNNING,127.0.0.1:13345,dosenbach2010,0.3,5,100,True,True,True,partial correlation,6.0,118.664,0.458333,0.5
eval_multiscale_model_c9638_00013,RUNNING,127.0.0.1:13342,dosenbach2010,0.5,5,100,True,True,True,partial correlation,6.0,119.876,0.5,0.75
eval_multiscale_model_c9638_00014,RUNNING,127.0.0.1:13339,dosenbach2010,1.0,5,100,True,True,True,partial correlation,5.0,100.263,0.333333,0.75
eval_multiscale_model_c9638_00015,RUNNING,127.0.0.1:13340,dosenbach2010,0.01,8,100,True,True,True,partial correlation,5.0,103.918,0.5,0.375
eval_multiscale_model_c9638_00016,RUNNING,127.0.0.1:13344,dosenbach2010,0.1,8,100,True,True,True,partial correlation,5.0,101.23,0.666667,0.5
eval_multiscale_model_c9638_00017,RUNNING,127.0.0.1:13341,dosenbach2010,0.3,8,100,True,True,True,partial correlation,5.0,101.743,0.416667,0.625
eval_multiscale_model_c9638_00018,RUNNING,127.0.0.1:13346,dosenbach2010,0.5,8,100,True,True,True,partial correlation,5.0,102.143,0.625,0.25
eval_multiscale_model_c9638_00019,RUNNING,127.0.0.1:13338,dosenbach2010,1.0,8,100,True,True,True,partial correlation,5.0,99.9065,0.5,0.5
eval_multiscale_model_c9638_00020,RUNNING,127.0.0.1:13349,dosenbach2010,0.01,10,100,True,True,True,partial correlation,5.0,103.54,0.291667,0.5
eval_multiscale_model_c9638_00021,RUNNING,127.0.0.1:13348,dosenbach2010,0.1,10,100,True,True,True,partial correlation,5.0,100.833,0.5,0.75


2024-03-03 23:08:54,584	INFO tune.py:1042 -- Total run time: 359.94 seconds (349.50 seconds for the tuning loop).
Resume experiment with: Tuner.restore(path="/Users/morteza/ray_results/eval_multiscale_model_2024-03-03_23-02-54", trainable=...)
- eval_multiscale_model_c9638_00023: FileNotFoundError('Could not fetch metrics for eval_multiscale_model_c9638_00023: both result.json and progress.csv were not found at /Users/morteza/ray_results/eval_multiscale_model_2024-03-03_23-02-54/eval_multiscale_model_c9638_00023_23_atlas=dosenbach2010,clf__learning_rate=0.5000,clf__max_depth=10,clf__n_estimators=100,extract_2024-03-03_23-03-21')
- eval_multiscale_model_c9638_00024: FileNotFoundError('Could not fetch metrics for eval_multiscale_model_c9638_00024: both result.json and progress.csv were not found at /Users/morteza/ray_results/eval_multiscale_model_2024-03-03_23-02-54/eval_multiscale_model_c9638_00024_24_atlas=dosenbach2010,clf__learning_rate=1,clf__max_depth=10,clf__n_estimators=100,extra

## Cross-validation accuracy and CI

In [None]:
clf = XGBClassifier(base_score=.5, objective='binary:logistic')
tuned_pipeline = MultiScaleClassifier(classifier=clf).set_params(**best_params)

cv_scores = cross_val_score(tuned_pipeline, X, y,
                            cv=StratifiedShuffleSplit(n_splits=N_RUNS, test_size=TEST_SIZE),
                            verbose=3, n_jobs=-1)

# Calculate 95% confidence interval
bootstrap_ci = stats.bootstrap(cv_scores.reshape(1,-1), np.mean)

# Report
clear_output(wait=True)
print(f'Test accuracy (mean ± std): {cv_scores.mean():.3f} ± {cv_scores.std():.3f}')
print(bootstrap_ci.confidence_interval)

Test accuracy (mean ± std): 0.711 ± 0.157
ConfidenceInterval(low=0.68, high=0.74125)
