In [6]:
import optuna
import pandas as pd

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [7]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
           'DiabetesPedigreeFunction', 'Age', 'Outcome']
df = pd.read_csv(url, names=columns)

In [8]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [9]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 200)
    max_depth = trial.suggest_int("max_depth", 3, 10)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )

    score = cross_val_score(model, X_train, y_train, scoring="accuracy").mean()

    return score

In [17]:
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=50)

[32m[I 2026-02-06 17:29:57,849][0m A new study created in memory with name: no-name-5905f949-c77a-4dfb-aa0c-e229605482a4[0m
[32m[I 2026-02-06 17:29:58,790][0m Trial 0 finished with value: 0.7606424097027855 and parameters: {'n_estimators': 91, 'max_depth': 4}. Best is trial 0 with value: 0.7606424097027855.[0m
[32m[I 2026-02-06 17:30:00,077][0m Trial 1 finished with value: 0.7687858190057311 and parameters: {'n_estimators': 130, 'max_depth': 6}. Best is trial 1 with value: 0.7687858190057311.[0m
[32m[I 2026-02-06 17:30:01,444][0m Trial 2 finished with value: 0.7590030654404905 and parameters: {'n_estimators': 151, 'max_depth': 4}. Best is trial 1 with value: 0.7687858190057311.[0m
[32m[I 2026-02-06 17:30:03,099][0m Trial 3 finished with value: 0.7622550979608157 and parameters: {'n_estimators': 194, 'max_depth': 4}. Best is trial 1 with value: 0.7687858190057311.[0m
[32m[I 2026-02-06 17:30:03,769][0m Trial 4 finished with value: 0.7720245235239237 and parameters: {'n_e

In [18]:
study.best_params

{'n_estimators': 50, 'max_depth': 7}

In [19]:
study.best_trial.value

0.7817939490870319