In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from catboost import CatBoostClassifier
import optuna

import matplotlib.pyplot as plt

In [2]:
X, y = load_breast_cancer(return_X_y=True)

In [3]:
def objective(trial):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3)

    param = {
        "iterations": trial.suggest_int("iterations", 10, 100),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1),
        "depth": trial.suggest_int("depth", 1, 16),
        "l2_leaf_reg": trial.suggest_int("l2_leaf_reg", 1, 10)
    }
    
    model = CatBoostClassifier(**param, verbose=False)
    model.fit(X_train, y_train, eval_set=(X_valid, y_valid))
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)

In [4]:
study = optuna.create_study(direction="maximize")

[I 2024-04-23 14:45:55,703] A new study created in memory with name: no-name-6478f920-4d11-45ac-b4ae-b0a306b48df2


In [6]:
study.optimize(func=objective, n_trials=25, show_progress_bar=True)

  0%|          | 0/25 [00:00<?, ?it/s]

[I 2024-04-23 15:52:50,550] Trial 25 finished with value: 0.9824561403508771 and parameters: {'iterations': 91, 'learning_rate': 0.08597910231927507, 'depth': 9, 'l2_leaf_reg': 6}. Best is trial 19 with value: 0.9883040935672515.
[I 2024-04-23 15:56:51,373] Trial 26 finished with value: 0.9532163742690059 and parameters: {'iterations': 92, 'learning_rate': 0.08781548950648851, 'depth': 15, 'l2_leaf_reg': 6}. Best is trial 19 with value: 0.9883040935672515.
[I 2024-04-23 15:57:22,272] Trial 27 finished with value: 0.9707602339181286 and parameters: {'iterations': 93, 'learning_rate': 0.0872719273853698, 'depth': 12, 'l2_leaf_reg': 6}. Best is trial 19 with value: 0.9883040935672515.
[I 2024-04-23 15:59:08,653] Trial 28 finished with value: 0.9415204678362573 and parameters: {'iterations': 43, 'learning_rate': 0.08727982071553801, 'depth': 15, 'l2_leaf_reg': 9}. Best is trial 19 with value: 0.9883040935672515.
[I 2024-04-23 15:59:11,168] Trial 29 finished with value: 0.9707602339181286 a

In [7]:
study.best_params

{'iterations': 48,
 'learning_rate': 0.07051942642058556,
 'depth': 14,
 'l2_leaf_reg': 7}

In [10]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3)

In [11]:
model = CatBoostClassifier(iterations=48, learning_rate=0.07051942642058556, depth=14, l2_leaf_reg=7)

In [12]:
model.fit(X_train, y_train, eval_set=(X_valid, y_valid), verbose=False, plot=True);

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

In [13]:
preds = model.predict(X_valid)

In [14]:
accuracy_score(y_valid, preds)

0.9590643274853801