In [None]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score

import numpy as np
import pandas as pd
import optuna
import xgboost as xgb

In [None]:
class Objective:
    def __init__(self, X, y, split_func, use_gpus=False):
        train_x, valid_x, train_y, valid_y = split_func(X, y, test_size=0.25)
        
        self.dtrain = xgb.DMatrix(train_x, label=train_y)
        self.dvalid = xgb.DMatrix(valid_x, label=valid_y)
        self.valid_y = valid_y
        self.use_gpus = use_gpus
        self.gpu_status = [0 for _ in range(8)]
        
    def get_next_gpu(self):
        for gpu_id, stat in enumerate(self.gpu_status):
            if stat == 0:
                next_gpu = gpu_id
                break
        self.gpu_status[next_gpu] = 1
        return next_gpu
    
    def __call__(self, trial):
        params = {
            "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
            "max_depth": trial.suggest_int("max_depth", 1, 9),
            "eta": trial.suggest_float("eta", 1e-8, 1.0, log=True),
            "gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
            "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]),
        }
        
        if self.use_gpus:
            gpu_id = self.get_next_gpu()
            params["gpu_id"] = gpu_id
            params["tree_method"] = "gpu_hist"
        
        bst = xgb.train(params, self.dtrain)
        preds = bst.predict(self.dvalid)
        pred_labels = np.rint(preds)
        accuracy = accuracy_score(self.valid_y, pred_labels)
        return accuracy

In [None]:
from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=100_000,
    n_features=100,
    weights=[0.75, 0.25],
    flip_y=0.75,
    random_state=123,
)

objective = Objective(X, y, train_test_split)
study = optuna.create_study(direction="maximize")

In [None]:
%%time
study.optimize(objective, n_trials=100, timeout=600)

In [None]:
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

# Use Multi GPUs
Only if our data fits onto a single GPU!

In [None]:
objective = Objective(X, y, train_test_split, use_gpus=True)
study = optuna.create_study(direction="maximize")

In [None]:
%%time
study.optimize(objective, n_trials=100, timeout=600)

In [None]:
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))