# MultiScale Classifier

Sections:

1. Data
2. Hyper-parameter space
3. HPO
4. Cross-validation scores


## Setup

In [1]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import json
import ray
from IPython.display import clear_output
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import Parallel, delayed
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch
from scipy import stats
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit, cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from xgboost import XGBClassifier

import skexplain
from skexplain.common.importance_utils import to_skexplain_importance

from src.acnets.pipeline import MultiScaleClassifier, Parcellation
from functools import partial

In [2]:
N_RUNS = 100      # 10 independent train/test runs
TEST_SIZE = .25  # proportion of test subjects out of 32 subjects

## Prepare data

In [3]:
# Xy
subjects = Parcellation(atlas_name='difumo_64_2mm').fit_transform(None).coords['subject'].values
X = subjects.reshape(-1,1)                                  # subjects ids, shape: (n_subjects, 1)

y_encoder = LabelEncoder()
y = y_encoder.fit_transform([s[:4] for s in subjects])      # labels (AVGP=1 or NVGP=1), shape: (n_subjects,)
y_mapping = dict(zip(y_encoder.classes_, y_encoder.transform(y_encoder.classes_)))

# DEBUG (report label mapping)
print('[DEBUG] label mapping:', y_mapping)

# DEBUG (expected to overfit, i.e., accuracy is 1)
overfit_score = MultiScaleClassifier().fit(X, y).score(X, y)
print(f'[DEBUG] overfit accuracy: {overfit_score:.3f}')

[DEBUG] label mapping: {'AVGP': 0, 'NVGP': 1}
[DEBUG] overfit accuracy: 1.000


## Hyper-parameter tuning

### Parameter space

In [None]:
xgb_param_space = {
    'clf': XGBClassifier(base_score=.5, objective='binary:logistic'),
    # 'atlas': ['dosenbach2010', 'gordon2014_2mm', 'difumo_64_2mm'],
    'atlas': tune.choice(['dosenbach2010']),
    # 'extract_h1_features': tune.grid_search([False, True]),
    # 'extract_h2_features': tune.grid_search([False, True]),
    # 'extract_h3_features': tune.grid_search([True]),
    # 'clf__subsample': tune.choice([.5, .8, 1]),
    'clf__n_estimators': tune.grid_search([100, 200]),
    'clf__max_depth': tune.grid_search([2, 4, 6, 8]),
    'clf__learning_rate': tune.grid_search([.1, .3]),
}

rfc_param_space = {
    'clf': RandomForestClassifier(),
    # 'atlas': tune.choice(['dosenbach2010', 'gordon2014_2mm', 'difumo_64_2mm']),
    'atlas': tune.choice(['dosenbach2010']),
    'clf__n_estimators': tune.randint(100, 500),
    'clf__max_depth': tune.randint(1, 8),
    'clf__min_samples_split': tune.randint(2, 8),
    'clf__min_samples_leaf': tune.randint(1, 5),
    'clf__criterion': tune.choice(['gini', 'entropy']),
    'clf__max_features': tune.choice([None, 'sqrt'])
}

svm_param_space = {
    'clf': LinearSVC(max_iter=100000),
    # 'atlas': tune.choice(['dosenbach2010', 'gordon2014_2mm', 'difumo_64_2mm']),
    'atlas': tune.choice(['dosenbach2010']),
    'clf__penalty': ['l1'],
    'clf__dual': [False],
    'clf__C': tune.choice([.01, .1, 1, 10, 100, 1000]),
    # 'clf__kernel': ['linear','poly','rbf','sigmoid'],
    # 'clf__gamma': tune.choice(['scale'])
}


In [None]:
# now we define the objective function

def eval_multiscale_model(config,
                          classifier,
                          X=X, y=y,
                          n_runs=1):

    val_scores = []
    test_scores = []

    model = MultiScaleClassifier(classifier=classifier).set_params(**config)

    for i in range(n_runs):

        # outer CV
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, stratify=y)

        # Inner CV (K-fold)
        inner_cv = StratifiedKFold(n_splits=8, shuffle=True)

        # fit and score the validation set
        val_score = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=inner_cv).mean()

        # test score (we only report this and do not use it during HPO)
        test_score = model.fit(X_train, y_train).score(X_test, y_test)

        val_scores.append(val_score)
        test_scores.append(test_score)

    return {'val_accuracy': np.mean(val_scores),
            'test_accuracy': np.mean(test_scores)}

# DEBUG
debug_config = dict(atlas='dosenbach2010', extract_h1_features=False, extract_h2_features=True)
'DEBUG', eval_multiscale_model(debug_config, classifier=XGBClassifier(), n_runs=10)

### HPO

In [29]:
# prep the hyper-parameter space and init the objective function
param_space = xgb_param_space.copy()
clf = param_space.pop('clf')
output_name = f'models/multiscale_classifier-{clf.__class__.__name__}-hpo.json'

objective_func = partial(eval_multiscale_model, classifier=clf, n_runs=10)

ray.shutdown(); ray.init()

tuner = tune.Tuner(
    objective_func,
    param_space=param_space,
    tune_config=tune.TuneConfig(
        metric='val_accuracy',
        mode='max',
    )
)

tuning_results = tuner.fit()
ray.shutdown()

clear_output()
best_score = tuner.get_results().get_best_result().metrics['val_accuracy']
best_params = tuner.get_results().get_best_result(metric='val_accuracy', mode='max').config

# store the best hyper-parameters
with open(output_name, 'w') as f:
    json.dump(best_params, f, indent=2)

print('[DEBUG] Best HPO score:', best_score)

# create a tuned model using the best hyper-parameters
tuned_model = MultiScaleClassifier(classifier=clf).set_params(**best_params)
tuned_model

[DEBUG] Best HPO score: 0.7291666666666666


## Cross-validation accuracy and CI

In [30]:
cv_scores = cross_val_score(tuned_model, X, y,
                            cv=StratifiedShuffleSplit(n_splits=N_RUNS, test_size=TEST_SIZE),
                            verbose=3, n_jobs=-1)

# calculate 95% confidence interval
bootstrap_ci = stats.bootstrap(cv_scores.reshape(1,-1), np.mean)

clear_output(wait=True)
print(f'Test accuracy (mean ± std): {cv_scores.mean():.2f} ± {cv_scores.std():.2f}')
print(bootstrap_ci.confidence_interval)

Test accuracy (mean ± std): 0.71 ± 0.14
ConfidenceInterval(low=0.68625, high=0.74125)
