In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.1-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.8/231.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.1 colorlog-6.9.0 optuna-4.2.1


In [2]:
import optuna
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score,train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [3]:
data = load_iris()
x = pd.DataFrame(data.data , columns=data.feature_names)
y = data.target

In [5]:
def objective(trial):
    n_estimators = trial.suggest_int("n_estimators",50,300,step=50)
    max_depth=trial.suggest_int("max_depth",3,20)
    min_samples_split = trial.suggest_int("min_samples_split",2,10)
    max_features = trial.suggest_categorical("max_features",['sqrt','log2',None])
    model = RandomForestClassifier(
    n_estimators = n_estimators,
    max_depth = max_depth,
    min_samples_split = min_samples_split,
    max_features = max_features,
    random_state = 42,
)
    score = cross_val_score(model,x,y,cv=5,scoring="accuracy").mean()
    return score

In [6]:
study=optuna.create_study(direction="maximize")
study.optimize(objective,n_trials=30)

[I 2025-03-28 11:55:35,671] A new study created in memory with name: no-name-228ea7c1-bebc-47d2-a5b2-78be9d6ffa44
[I 2025-03-28 11:55:37,067] Trial 0 finished with value: 0.9666666666666668 and parameters: {'n_estimators': 150, 'max_depth': 8, 'min_samples_split': 10, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9666666666666668.
[I 2025-03-28 11:55:37,760] Trial 1 finished with value: 0.9666666666666668 and parameters: {'n_estimators': 50, 'max_depth': 5, 'min_samples_split': 7, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9666666666666668.
[I 2025-03-28 11:55:40,521] Trial 2 finished with value: 0.96 and parameters: {'n_estimators': 250, 'max_depth': 20, 'min_samples_split': 9, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9666666666666668.
[I 2025-03-28 11:55:42,098] Trial 3 finished with value: 0.9666666666666668 and parameters: {'n_estimators': 200, 'max_depth': 9, 'min_samples_split': 3, 'max_features': 'log2'}. Best is trial 0 with value: 0.966666666

In [7]:
print("Best hyperparameters:",study.best_params)
print("Best accuracy:",study.best_value)

Best hyperparameters: {'n_estimators': 150, 'max_depth': 8, 'min_samples_split': 10, 'max_features': 'sqrt'}
Best accuracy: 0.9666666666666668


In [8]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
best_params = study.best_params
best_model = RandomForestClassifier(**best_params,random_state=42)
best_model.fit(x_train,y_train)
y_pred=best_model.predict(x_test)
accuracy = accuracy_score(y_test,y_pred)
print(f"Test accuracy: {accuracy: .4f}")

Test accuracy:  1.0000
