In [3]:
# predict income > 50k

from hyperopt import hp
import sk_stepwise as sw
import xgboost as xgb
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

adult = fetch_openml("adult", version=2, as_frame=True)
X = adult["data"]
y = adult["target"] == ">50K"

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

rounds = [
    {
        "max_depth": hp.quniform("max_depth", 1, 8, 1),
        "min_child_weight": hp.loguniform("min_child_weight", -2, 3),
    },
    {
        "subsample": hp.uniform("subsample", 0.5, 1),
        "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1),
    },
    {
        "reg_alpha": hp.uniform("reg_alpha", 0, 10),
        "reg_lambda": hp.uniform("reg_lambda", 1, 10),
    },
    {"learning_rate": hp.loguniform("learning_rate", -7, 0)},
]

swopt = sw.StepwiseHyperoptOptimizer(xgb.XGBClassifier(enable_categorical=True), rounds)
swopt.fit(X_train, y_train)
print(swopt.best_params_)

Optimizing step 1/4
100%|█| 100/100 [00:36<00:00,  2.76trial/s, best loss: 0.127869542
Best parameters after step 1: {'max_depth': 3, 'min_child_weight': 0.1690802441977019}
Best score after step 1: -0.1278695423092857
Optimizing step 2/4
100%|█| 100/100 [00:34<00:00,  2.90trial/s, best loss: 0.128142505
Best parameters after step 2: {'max_depth': 3, 'min_child_weight': 0.1690802441977019, 'colsample_bytree': 0.6797680005304337, 'subsample': 0.9690716727179199}
Best score after step 2: -0.12814250532279742
Optimizing step 3/4
100%|█| 100/100 [00:33<00:00,  3.00trial/s, best loss: 0.127787560
Best parameters after step 3: {'max_depth': 3, 'min_child_weight': 0.1690802441977019, 'colsample_bytree': 0.6797680005304337, 'subsample': 0.9690716727179199, 'reg_alpha': 2, 'reg_lambda': 1.0201255435343553}
Best score after step 3: -0.12778756025651072
Optimizing step 4/4
100%|█| 100/100 [00:34<00:00,  2.87trial/s, best loss: 0.127487364
Best parameters after step 4: {'max_depth': 3, 'min_child_