In [9]:
import optuna
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [10]:
# 3.1 Load data
X, y = load_breast_cancer(return_X_y=True)
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [11]:
# 3.2 Define objective
def objective(trial):
    # 3.2.1 Suggest hyperparameters
    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        "eval_metric": "auc",
        "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
        "lambda": trial.suggest_loguniform("lambda", 1e-8, 10.0),
        "alpha": trial.suggest_loguniform("alpha", 1e-8, 10.0),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "eta": trial.suggest_loguniform("eta", 1e-3, 0.3),
        "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
    }

    # 3.2.2 Create DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dvalid = xgb.DMatrix(X_valid, label=y_valid)

    # 3.2.3 Train with pruning callback
    pruning_callback = optuna.integration.XGBoostPruningCallback(
        trial, "validation-auc"
    )

    bst = xgb.train(
        params=param,
        dtrain=dtrain,
        num_boost_round=1000,
        evals=[(dvalid, "validation")],
        early_stopping_rounds=50,
        callbacks=[pruning_callback],
        verbose_eval=False
    )

    # 3.2.4 Evaluate
    preds = bst.predict(dvalid)
    auc = roc_auc_score(y_valid, preds)
    return auc

In [12]:
# 3.3 Create and run study
if __name__ == "__main__":
    study = optuna.create_study(
        direction="maximize",
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
    )
    study.optimize(objective, n_trials=50, timeout=600)

    print("Best AUC:", study.best_value)
    print("Best params:", study.best_params)


[I 2025-06-27 07:53:19,432] A new study created in memory with name: no-name-07891832-6264-4311-86d1-9b233fca8b7d
  "lambda": trial.suggest_loguniform("lambda", 1e-8, 10.0),
  "alpha": trial.suggest_loguniform("alpha", 1e-8, 10.0),
  "eta": trial.suggest_loguniform("eta", 1e-3, 0.3),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
[I 2025-06-27 07:53:19,632] Trial 0 finished with value: 0.9924664264657713 and parameters: {'booster': 'dart', 'lambda': 0.056028079951429666, 'alpha': 0.027968585420110256, 'max_depth': 10, 'eta': 0.045112587350101784, 'subsample': 0.5388769380666425, 'colsample_bytree': 0.8627364489306262}. Best is trial 0 with value: 0.9924664264657713.
  "lambda": trial.suggest_loguniform("lambda", 1e-8, 10.0),
  "alpha": trial.suggest_loguniform("alpha", 1e-8, 10.0),
  "eta": trial.suggest_loguniform("eta", 1e-3, 0.3),
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "c

Best AUC: 0.99737962659679
Best params: {'booster': 'dart', 'lambda': 0.011916378192625601, 'alpha': 0.15082104794986245, 'max_depth': 3, 'eta': 0.008406615320225208, 'subsample': 0.5263587853974492, 'colsample_bytree': 0.585494791048681}


In [14]:
import optuna.visualization as vis


vis.plot_optimization_history(study)

In [15]:
vis.plot_param_importances(study)

In [16]:
vis.plot_slice(study)