Click [here]() to access the associated Medium article.

# Setup

In [3]:
%pip install -qU pip optuna scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [6]:
import optuna
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

SEED = 77

# Data

In [14]:
iris = load_iris()
X, y = iris.data, iris.target

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=SEED
)

# The first 3 elements
print(f"X_train:\n{X_train[:3]}...\n")
print(f"y_train:\n{y_train[:3]}...\n")
print(f"X_val:\n{X_val[:3]}...\n")
print(f"y_val:\n{y_val[:3]}...\n")

X_train:
[[6.3 2.5 4.9 1.5]
 [7.2 3.  5.8 1.6]
 [5.3 3.7 1.5 0.2]]...

y_train:
[1 2 0]...

X_val:
[[5.8 2.7 3.9 1.2]
 [6.3 2.8 5.1 1.5]
 [5.7 2.5 5.  2. ]]...

y_val:
[1 2 2]...



# Objective function

In [15]:
def objective(trial):
    # Hyperparameters to optimize
    n_estimators = trial.suggest_int("n_estimators", 10, 100)
    max_depth = trial.suggest_int("max_depth", 3, 20)
    min_samples_split = trial.suggest_float("min_samples_split", 0.1, 1.0)
    
    # Create and train the model
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        random_state=SEED
    )
    model.fit(X_train, y_train)
    
    # Evaluate on validation set
    val_preds = model.predict(X_val)
    val_accuracy = accuracy_score(y_val, val_preds)
    
    # Our goal: maximize accuracy!
    return val_accuracy

# Study

In [19]:
study = optuna.create_study(direction="maximize")  # Maximize accuracy
study.optimize(objective, n_trials=100)

[I 2024-02-12 11:56:10,802] A new study created in memory with name: no-name-8134c441-bdcf-452a-9ed5-ae0c8722c345
[I 2024-02-12 11:56:10,877] Trial 0 finished with value: 0.8666666666666667 and parameters: {'n_estimators': 65, 'max_depth': 12, 'min_samples_split': 0.3146680813827415}. Best is trial 0 with value: 0.8666666666666667.
[I 2024-02-12 11:56:10,968] Trial 1 finished with value: 0.3 and parameters: {'n_estimators': 93, 'max_depth': 11, 'min_samples_split': 0.7188358544224039}. Best is trial 0 with value: 0.8666666666666667.
[I 2024-02-12 11:56:11,068] Trial 2 finished with value: 0.8666666666666667 and parameters: {'n_estimators': 97, 'max_depth': 3, 'min_samples_split': 0.17517568716403698}. Best is trial 0 with value: 0.8666666666666667.
[I 2024-02-12 11:56:11,160] Trial 3 finished with value: 0.8666666666666667 and parameters: {'n_estimators': 86, 'max_depth': 8, 'min_samples_split': 0.45606563440411874}. Best is trial 0 with value: 0.8666666666666667.
[I 2024-02-12 11:56:1

# TPE (Tree-structured Parzen Estimator)

In [17]:
study_tpe = optuna.create_study(
    direction="maximize", sampler=optuna.samplers.TPESampler()
)
study_tpe.optimize(objective, n_trials=100)

[I 2024-02-12 11:50:04,668] A new study created in memory with name: no-name-406e0cce-60e6-4420-8aeb-82c4bef5c2a8
[I 2024-02-12 11:50:04,735] Trial 0 finished with value: 0.26666666666666666 and parameters: {'n_estimators': 37, 'max_depth': 7, 'min_samples_split': 0.7885613884209568}. Best is trial 0 with value: 0.26666666666666666.
[I 2024-02-12 11:50:04,754] Trial 1 finished with value: 0.26666666666666666 and parameters: {'n_estimators': 15, 'max_depth': 5, 'min_samples_split': 0.6731251336910595}. Best is trial 0 with value: 0.26666666666666666.
[I 2024-02-12 11:50:04,849] Trial 2 finished with value: 0.3 and parameters: {'n_estimators': 86, 'max_depth': 12, 'min_samples_split': 0.7460270374607401}. Best is trial 2 with value: 0.3.
[I 2024-02-12 11:50:04,942] Trial 3 finished with value: 0.26666666666666666 and parameters: {'n_estimators': 90, 'max_depth': 13, 'min_samples_split': 0.716405481517271}. Best is trial 2 with value: 0.3.
[I 2024-02-12 11:50:04,968] Trial 4 finished with

# Random Search

In [18]:
study_random = optuna.create_study(
    direction="maximize", sampler=optuna.samplers.RandomSampler()
)
study_random.optimize(objective, n_trials=100)

[I 2024-02-12 11:54:04,344] A new study created in memory with name: no-name-6a1f6b6b-86c3-480a-ae3b-ae341ab5fca1
[I 2024-02-12 11:54:04,370] Trial 0 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 18, 'max_depth': 3, 'min_samples_split': 0.5323455027233865}. Best is trial 0 with value: 0.8333333333333334.
[I 2024-02-12 11:54:04,448] Trial 1 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 75, 'max_depth': 7, 'min_samples_split': 0.5407936894092948}. Best is trial 0 with value: 0.8333333333333334.
[I 2024-02-12 11:54:04,499] Trial 2 finished with value: 0.8666666666666667 and parameters: {'n_estimators': 47, 'max_depth': 17, 'min_samples_split': 0.6854362685752783}. Best is trial 2 with value: 0.8666666666666667.
[I 2024-02-12 11:54:04,589] Trial 3 finished with value: 0.8666666666666667 and parameters: {'n_estimators': 87, 'max_depth': 4, 'min_samples_split': 0.14004470465709454}. Best is trial 2 with value: 0.8666666666666667.
[I 2024-

# Visualization

In [22]:
optuna.visualization.plot_optimization_history(study_tpe)

In [23]:
optuna.visualization.plot_param_importances(study_tpe)

In [24]:
optuna.visualization.plot_parallel_coordinate(study_tpe)

In [25]:
optuna.visualization.plot_slice(study_tpe)