In [2]:
import numpy as np
import pandas as pd
from imblearn.ensemble import BalancedRandomForestClassifier
from imblearn.over_sampling import RandomOverSampler 
import optuna
from sklearn.model_selection import StratifiedKFold, train_test_split, cross_val_score
from sklearn import datasets

In [14]:

diabetes = datasets.load_diabetes()
X = diabetes.data[:200]
y = np.round(np.random.uniform(0,1, 200))

seed = 42
n_splits = 5
n_splits_nested = 2
n_trials = 5

class oversampled_Kfold():
    def __init__(self, n_splits, random_state, n_repeats=1):
        self.n_splits = n_splits
        self.n_repeats = n_repeats
        self.random_state = random_state

    def split(self, X, y):
        splits = np.array_split(np.random.choice(len(X), len(X),replace=False), self.n_splits)
        train, test = [], []
        for repeat in range(self.n_repeats):
            for fold in range(len(splits)):
                train_splits = splits[:]
                test_splits = train_splits.pop(fold)
                ros = RandomOverSampler(random_state=self.random_state)
                train_idx = np.concatenate(train_splits)
                Xidx_r, y_r = ros.fit_resample(train_idx.reshape(-1, 1), y[train_idx])
                train.append(Xidx_r.flatten())
                test.append(test_splits)
        return list(zip(train, test))

# output = cross_validate(clf,x,y, scoring=metrics,cv=rkf)


def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 10, 1000)
    max_depth = trial.suggest_int("max_depth", 2, 500, log=True)
    trial_config = {'n_estimators': n_estimators, 'max_depth': max_depth}
    
    
    model = BalancedRandomForestClassifier(**trial_config)

    score = cross_val_score(model, X_train, y_train, n_jobs=-1, cv=n_splits_nested)
    accuracy = score.mean()
    return accuracy


kf = oversampled_Kfold(n_splits=5, random_state=42, n_repeats=2)

kf_ = kf.split(X, y)

f = 0
preds = []
probs = []
GTs = []
for train_index, test_index in kf_:
    print('Running fold number: ', f)
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    print((y_train == 0).sum(), (y_train==1).sum())
    print((y_test == 0).sum(), (y_test==1).sum())
    if f == 0: 
        print('running optuna in first fold')
        study = optuna.create_study(study_name='test', direction='maximize')
        study.optimize(objective, n_trials=n_trials)
        config = study.best_trial.params
        print('bested optuna params:' , config)
        
    model = BalancedRandomForestClassifier(**config)
    model.fit(X_train, y_train)
    preds.append(model.predict(X_test))
    probs.append(model.predict_proba(X_test))
    GTs.append(y_test)
    f += 1
        
preds = np.concatenate(preds)
probs = np.concatenate(probs)
GTs = np.concatenate(GTs)

[32m[I 2021-08-17 12:44:07,703][0m A new study created in memory with name: test[0m


Running fold number:  0
89 89
21 19
running optuna in first fold


[32m[I 2021-08-17 12:44:08,535][0m Trial 0 finished with value: 0.550561797752809 and parameters: {'n_estimators': 597, 'max_depth': 19}. Best is trial 0 with value: 0.550561797752809.[0m
[32m[I 2021-08-17 12:44:09,699][0m Trial 1 finished with value: 0.550561797752809 and parameters: {'n_estimators': 806, 'max_depth': 7}. Best is trial 0 with value: 0.550561797752809.[0m
[32m[I 2021-08-17 12:44:10,095][0m Trial 2 finished with value: 0.5449438202247191 and parameters: {'n_estimators': 283, 'max_depth': 115}. Best is trial 0 with value: 0.550561797752809.[0m
[32m[I 2021-08-17 12:44:10,137][0m Trial 3 finished with value: 0.5674157303370786 and parameters: {'n_estimators': 26, 'max_depth': 30}. Best is trial 3 with value: 0.5674157303370786.[0m
[32m[I 2021-08-17 12:44:11,033][0m Trial 4 finished with value: 0.5393258426966292 and parameters: {'n_estimators': 648, 'max_depth': 9}. Best is trial 3 with value: 0.5674157303370786.[0m


bested optuna params: {'n_estimators': 26, 'max_depth': 30}
Running fold number:  1
87 87
23 17
Running fold number:  2
93 93
17 23
Running fold number:  3
87 87
23 17
Running fold number:  4
84 84
26 14
Running fold number:  5
89 89
21 19
Running fold number:  6
87 87
23 17
Running fold number:  7
93 93
17 23
Running fold number:  8
87 87
23 17
Running fold number:  9
84 84
26 14


Traceback (most recent call last):
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/imblearn/ensemble/_forest.py", line 425, in fit
    accept_sparse="csc", dtype=DTYPE)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/base.py", line 432, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 73, in inner_f
    return f(**kwargs)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 803, in check_X_y
    estimator=estimator)
  File "/home/matthewvowels/Gi

Traceback (most recent call last):
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/imblearn/ensemble/_forest.py", line 425, in fit
    accept_sparse="csc", dtype=DTYPE)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/base.py", line 432, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 73, in inner_f
    return f(**kwargs)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 803, in check_X_y
    estimator=estimator)
  File "/home/matthewvowels/Gi

Traceback (most recent call last):
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/imblearn/ensemble/_forest.py", line 425, in fit
    accept_sparse="csc", dtype=DTYPE)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/base.py", line 432, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 73, in inner_f
    return f(**kwargs)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 803, in check_X_y
    estimator=estimator)
  File "/home/matthewvowels/Gi

Traceback (most recent call last):
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/imblearn/ensemble/_forest.py", line 425, in fit
    accept_sparse="csc", dtype=DTYPE)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/base.py", line 432, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 73, in inner_f
    return f(**kwargs)
  File "/home/matthewvowels/GitHub/PhD_part_1/anaconda3/envs/my-torch/lib/python3.7/site-packages/sklearn/utils/validation.py", line 803, in check_X_y
    estimator=estimator)
  File "/home/matthewvowels/Gi