In [1]:
from sporboost.forest import *
from get_data import load
from sklearn.metrics import roc_auc_score
from sklearn.datasets import load_iris
# X, y = load("data/sparse_parity.parquet")
X, y = load_iris(return_X_y=True)
# y_ = y
# y = onehot_encode(y)

In [2]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from functools import partial
import pandas as pd

# indicies for kfold validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=20220329)

cv_func = partial(GridSearchCV, cv=kfold, verbose=3, n_jobs=-1, return_train_score=True)

# Hyperparameter searches
p = X.shape[1]
cv = {
    'rf' : cv_func(estimator = RandomForest(), param_grid={'max_depth' : [10]}),
    'ada' : cv_func(estimator = AdaBoost(), param_grid={'max_depth' : [1, 2]}),
    'sporf' : cv_func(estimator = SPORF(), param_grid={'max_depth' : [10],
                                                       'd' : [p // 2, p],
                                                       's' : [3, 4]
                                                       }),
    'sporboost' : cv_func(estimator = SPORBoost(),
                          param_grid={'max_depth' : [1, 2],
                                      'd' : [p // 2, p],
                                      's' : [3, 4]
                                      }),
    'rrf' : cv_func(estimator = RotationalForest(),
                    param_grid={'max_depth' : [10], 'K' : [2, 3]
                    }),
    'rotboost' : cv_func(estimator = RotationalForest(),
                         param_grid={'max_depth' : [1, 2],
                                     'K' : [2, 3]
                         }),       
}

In [3]:
# Perform fitting and evaluation
_ = {k : m.fit(X, y) for k, m in cv.items()}

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 2 candidates, totalling 20 fits
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Fitting 10 folds for each of 8 candidates, totalling 80 fits
Fitting 10 folds for each of 2 candidates, totalling 20 fits
Fitting 10 folds for each of 4 candidates, totalling 40 fits


In [47]:
# Combine results into a single dataframe
results = pd.concat({k : pd.DataFrame(m.cv_results_) for k, m in cv.items()})

# Sort results within each group
results = results.reset_index(drop=False) \
.rename(columns={'level_0' : 'classifier'}) \
.drop(['level_1'], axis=1) \
.sort_values(['classifier', 'mean_test_score'], ascending=False) \
.reset_index(drop=True)
results.to_csv("report/iris.csv")