In [19]:
import numpy as np
import scipy as scp
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import (permutation_test_score, learning_curve, LeaveOneGroupOut,
                                     KFold, cross_val_score, cross_val_predict, cross_validate)
from sklearn.utils import parallel_backend
from sklearn.base import clone
from sklearn import datasets
from joblib.parallel import Parallel, delayed

## Setup

In [20]:
## IRIS dataset
# X_iris, y_iris = datasets.load_iris(return_X_y=True)
# mask = y_iris < 2
# X_iris, y_iris = X_iris[mask], y_iris[mask]

In [26]:
X = np.load("random_data_X.npy")
y = np.load("random_data_y.npy")

In [27]:
estimator = LogisticRegressionCV(class_weight='balanced', Cs=4)
cv = KFold(n_splits=3, shuffle=True, random_state=0)

## Pre-training permutation

In [23]:
score, null, p = permutation_test_score(estimator=estimator,
                                        X=X, y=y,
                                        cv = cv,
                                        n_permutations=500,
                                        scoring='roc_auc',
                                        verbose=True, n_jobs=-1
                                       )

KeyboardInterrupt: 

## Post-training permutation

In [24]:
from permutation_helpers import post_hoc_permuation

In [29]:
y_pred = cross_val_predict(clone(estimator), X, y, cv=cv, method='predict_proba')
score, null, p = post_hoc_permuation(y, y_pred[:, 1], n_jobs=-1, verbose=True)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 40 concurrent workers.
[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 976 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 3776 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 7376 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done 9921 out of 10000 | elapsed:    4.9s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:    4.9s finished


Above permutes across CV folds. If we want to permute within CV folds we need to use something like the below and run the permutation function each time, then aggregate

In [10]:
[test for _, test in cv.split(X_iris)]

[array([ 2,  3,  6,  7,  8, 13, 16, 22, 24, 26, 30, 33, 43, 45, 48, 53, 54,
        55, 60, 62, 68, 71, 73, 75, 76, 78, 80, 82, 86, 90, 92, 93, 95, 99]),
 array([ 0,  1,  4,  5, 10, 11, 15, 17, 18, 23, 27, 28, 31, 32, 34, 35, 38,
        40, 41, 42, 50, 51, 52, 56, 57, 59, 61, 63, 66, 74, 79, 85, 91]),
 array([ 9, 12, 14, 19, 20, 21, 25, 29, 36, 37, 39, 44, 46, 47, 49, 58, 64,
        65, 67, 69, 70, 72, 77, 81, 83, 84, 87, 88, 89, 94, 96, 97, 98])]