In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.17.2-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.45-cp310-cp310-win_amd64.whl.metadata (9.8 kB)
Collecting tqdm (from optuna)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting tomli (from alembic>=1.5.0->optuna)
  Downloading tomli-2.3.0-py3-none-any.whl.metadata (10 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.3.0-cp310-cp310-win_amd64.whl.metadata (4.2 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
Downloading alembic-1.17.2-py3-none-any.whl (248 kB)
Downloading sqlalchemy-2.0.45-cp310-cp310-win_amd64.whl (2.1 MB

In [2]:
import optuna 
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
           'DiabetesPedigreeFunction', 'Age', 'Outcome']

In [4]:
df = pd.read_csv(url,names=columns)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
import numpy as np 
cols_with_missing_vals = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
df[cols_with_missing_vals] = df[cols_with_missing_vals].replace(0,np.nan)
df.fillna(df.mean(),inplace=True)
print(df.isnull().sum())

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [6]:
X = df.drop('Outcome',axis=1)
y = df['Outcome']

X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.3,random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

def Objective(trial):
    n_estimators = trial.suggest_int('n_estimators',50,200)
    max_depth = trial.suggest_int('max_depth',3,20)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )

    score = cross_val_score(model,X_train,y_train,cv=3,scoring='accuracy').mean()
    return score

In [8]:
study = optuna.create_study(direction='maximize',sampler=optuna.samplers.TPESampler())
study.optimize(Objective,n_trials=50)

[I 2025-12-25 20:07:14,280] A new study created in memory with name: no-name-2dba80f7-73f0-4835-a767-73b3bcab40f0
[I 2025-12-25 20:07:14,931] Trial 0 finished with value: 0.7728119180633147 and parameters: {'n_estimators': 185, 'max_depth': 13}. Best is trial 0 with value: 0.7728119180633147.
[I 2025-12-25 20:07:15,328] Trial 1 finished with value: 0.7783985102420856 and parameters: {'n_estimators': 135, 'max_depth': 7}. Best is trial 1 with value: 0.7783985102420856.
[I 2025-12-25 20:07:15,743] Trial 2 finished with value: 0.7616387337057727 and parameters: {'n_estimators': 144, 'max_depth': 9}. Best is trial 1 with value: 0.7783985102420856.
[I 2025-12-25 20:07:16,092] Trial 3 finished with value: 0.7746741154562384 and parameters: {'n_estimators': 115, 'max_depth': 20}. Best is trial 1 with value: 0.7783985102420856.
[I 2025-12-25 20:07:16,550] Trial 4 finished with value: 0.7709497206703911 and parameters: {'n_estimators': 149, 'max_depth': 17}. Best is trial 1 with value: 0.778398

In [9]:
print(f'Best trial accuracy : {study.best_trial.value}')
print(f'Best hyperparameters : {study.best_trial.params}')

Best trial accuracy : 0.7802607076350093
Best hyperparameters : {'n_estimators': 118, 'max_depth': 16}


In [12]:
from sklearn.metrics import accuracy_score
best_model = RandomForestClassifier(**study.best_trial.params,random_state=42)
best_model.fit(X_train,y_train)
y_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test,y_pred)
print(f'Test accuracy with the best hyperparameters : {test_accuracy:.2f}')

Test accuracy with the best hyperparameters : 0.74


## Samplers in Optuna 

In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

def Objective(trial):
    n_estimators = trial.suggest_int('n_estimators',50,200) ## search space for estimators = 50 to 200
    max_depth = trial.suggest_int('max_depth',3,20) ## search space for max_depth for estimators = 3 to 20

    model = RandomForestClassifier(
        n_estimators = n_estimators,
        max_depth = max_depth,
        random_state = 42
    )

    score = cross_val_score(model,X_train,y_train,cv=3,scoring = 'accuracy').mean()
    return score


In [15]:
study = optuna.create_study(direction='maximize',sampler=optuna.samplers.RandomSampler()) ## carrying out RandomSearchCV
study.optimize(Objective,n_trials=50)

[I 2025-12-25 20:17:19,118] A new study created in memory with name: no-name-d6db9a2b-48b3-4898-87ef-94dc6ed6c8aa
[I 2025-12-25 20:17:19,627] Trial 0 finished with value: 0.7597765363128491 and parameters: {'n_estimators': 194, 'max_depth': 3}. Best is trial 0 with value: 0.7597765363128491.
[I 2025-12-25 20:17:20,046] Trial 1 finished with value: 0.7635009310986964 and parameters: {'n_estimators': 150, 'max_depth': 8}. Best is trial 1 with value: 0.7635009310986964.
[I 2025-12-25 20:17:20,279] Trial 2 finished with value: 0.7635009310986964 and parameters: {'n_estimators': 78, 'max_depth': 20}. Best is trial 1 with value: 0.7635009310986964.
[I 2025-12-25 20:17:20,752] Trial 3 finished with value: 0.7728119180633147 and parameters: {'n_estimators': 161, 'max_depth': 19}. Best is trial 3 with value: 0.7728119180633147.
[I 2025-12-25 20:17:21,051] Trial 4 finished with value: 0.7690875232774674 and parameters: {'n_estimators': 96, 'max_depth': 19}. Best is trial 3 with value: 0.77281191

In [19]:

# Print the best result
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best hyperparameters: {study.best_trial.params}')

Best trial accuracy: 0.7821229050279331
Best hyperparameters: {'n_estimators': 73, 'max_depth': 20}


In [20]:
from sklearn.metrics import accuracy_score

# Train a RandomForestClassifier using the best hyperparameters from Optuna
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)

# Fit the model to the training data
best_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy
print(f'Test Accuracy with best hyperparameters: {test_accuracy:.2f}')


Test Accuracy with best hyperparameters: 0.75


In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

def Objective(trial):
    n_estimators = trial.suggest_int('n_estimators',50,200)
    max_depth = trial.suggest_int('max_depth',3,20)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )
    score = cross_val_score(model,X_train,y_train,cv=3,scoring='accuracy').mean()
    return score

In [21]:
search_space = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [5, 10, 15, 20]
}

In [22]:
study = optuna.create_study(direction = 'maximize',sampler = optuna.samplers.GridSampler(search_space))
study.optimize(Objective,n_trials=50)

[I 2025-12-25 20:37:26,061] A new study created in memory with name: no-name-4632d733-8e46-45d7-8f5b-24d28c970713


[I 2025-12-25 20:37:26,354] Trial 0 finished with value: 0.7690875232774674 and parameters: {'n_estimators': 100, 'max_depth': 5}. Best is trial 0 with value: 0.7690875232774674.
[I 2025-12-25 20:37:26,814] Trial 1 finished with value: 0.7672253258845437 and parameters: {'n_estimators': 150, 'max_depth': 10}. Best is trial 0 with value: 0.7690875232774674.
[I 2025-12-25 20:37:26,986] Trial 2 finished with value: 0.7728119180633147 and parameters: {'n_estimators': 50, 'max_depth': 15}. Best is trial 2 with value: 0.7728119180633147.
[I 2025-12-25 20:37:27,292] Trial 3 finished with value: 0.7653631284916201 and parameters: {'n_estimators': 100, 'max_depth': 15}. Best is trial 2 with value: 0.7728119180633147.
[I 2025-12-25 20:37:27,604] Trial 4 finished with value: 0.7690875232774674 and parameters: {'n_estimators': 100, 'max_depth': 20}. Best is trial 2 with value: 0.7728119180633147.
[I 2025-12-25 20:37:27,787] Trial 5 finished with value: 0.7579143389199254 and parameters: {'n_estima

In [23]:

# Print the best result
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best hyperparameters: {study.best_trial.params}')

Best trial accuracy: 0.7746741154562384
Best hyperparameters: {'n_estimators': 50, 'max_depth': 5}


In [24]:
from sklearn.metrics import accuracy_score

# Train a RandomForestClassifier using the best hyperparameters from Optuna
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)

# Fit the model to the training data
best_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy
print(f'Test Accuracy with best hyperparameters: {test_accuracy:.2f}')


Test Accuracy with best hyperparameters: 0.74


## Optimize Multiple ML Models 

In [25]:
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.svm import SVC

In [32]:
def Objective(trial):
    classifier_name = trial.suggest_categorical('classifier',['SVM','RandomForestClassifier','GradientBoostingClassifier'])

    if classifier_name == 'SVM':
        c = trial.suggest_float('C',0.1,100,log=True)
        kernel = trial.suggest_categorical('kernel',['linear','rbf','poly','sigmoid'])
        gamma = trial.suggest_categorical('gamma',['scale','auto'])

        model = SVC(C=c,kernel=kernel,gamma=gamma,random_state=42)

    elif classifier_name == 'RandomForestClassifier':
        n_etimators = trial.suggest_int('n_estimators',50,100)
        max_depth = trial.suggest_int('max_depth',3,20)
        min_samples_split = trial.suggest_int('min_samples_split',2,10)
        min_samples_leaf = trial.suggest_int('min_sample_leaf',1,10)
        bootstrap = trial.suggest_categorical('bootstrap',[True,False])

        model = RandomForestClassifier(
          n_estimators=n_etimators,
          max_depth=max_depth,
          min_samples_split=min_samples_split,
          min_samples_leaf=min_samples_leaf,
          bootstrap=bootstrap,
          random_state=42  
        )
    
    elif classifier_name == 'GradientBoostingClassifier':
        n_estimators = trial.suggest_int('n_estimators', 50, 300)
        learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)
        max_depth = trial.suggest_int('max_depth', 3, 20)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)

        model = GradientBoostingClassifier(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            random_state=42
        )
    score = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()
    return score


In [33]:
study = optuna.create_study(direction='maximize')
study.optimize(Objective, n_trials=100)

[I 2025-12-25 21:59:27,158] A new study created in memory with name: no-name-8996fa7a-1c4a-4655-8ff8-ac7be1608ea7
[I 2025-12-25 21:59:27,899] Trial 0 finished with value: 0.7411545623836128 and parameters: {'classifier': 'GradientBoostingClassifier', 'n_estimators': 103, 'learning_rate': 0.1220674306138426, 'max_depth': 13, 'min_samples_split': 7, 'min_samples_leaf': 9}. Best is trial 0 with value: 0.7411545623836128.
[I 2025-12-25 21:59:28,584] Trial 1 finished with value: 0.74487895716946 and parameters: {'classifier': 'GradientBoostingClassifier', 'n_estimators': 81, 'learning_rate': 0.08941275888021147, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 5}. Best is trial 1 with value: 0.74487895716946.
[I 2025-12-25 21:59:28,741] Trial 2 finished with value: 0.7523277467411545 and parameters: {'classifier': 'RandomForestClassifier', 'n_estimators': 58, 'max_depth': 3, 'min_samples_split': 7, 'min_sample_leaf': 10, 'bootstrap': True}. Best is trial 2 with value: 0.75232774

In [34]:
best_trial = study.best_trial
print('Best trial parameters : ',best_trial.params)
print('Best trial accuracy : ',best_trial.value)

Best trial parameters :  {'classifier': 'SVM', 'C': 97.96497825940004, 'kernel': 'linear', 'gamma': 'scale'}
Best trial accuracy :  0.7858472998137801


In [40]:
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.svm import SVC

In [43]:
def Objective(trial):
    classifier_name = trial.suggest_categorical('classifier',['SVM','RandomForest','GradientBoosting'])

    if classifier_name == 'SVM':
        c = trial.suggest_float('C',0.1,100,log = True)
        kernel = trial.suggest_categorical('kernel',['linear','rbf','poly','sigmoid'])
        gamma = trial.suggest_categorical('gamma',['scale','auto'])
        model = SVC(C=c,kernel=kernel,gamma=gamma,random_state=42)

    elif classifier_name == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators',50,300)
        max_depth = trial.suggest_int('max_depth',3,20)
        min_samples_split = trial.suggest_int('min_sample_split',2,10)
        min_samples_leaf = trial.suggest_int('min_sample_leaf',1,10)
        bootstrap = trial.suggest_categorical('bootstrap',[True,False])

        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            bootstrap=bootstrap,
            random_state=42
        )
    
    elif classifier_name == 'GradientBoosting':
        n_estimators = trial.suggest_int('n_estimators', 50, 300)
        learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)
        max_depth = trial.suggest_int('max_depth', 3, 20)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)

        model = GradientBoostingClassifier(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            random_state=42
        )
    
    score = cross_val_score(model,X_train,y_train,cv=3,scoring='accuracy').mean()
    return score

In [44]:
study = optuna.create_study(direction='maximize')
study.optimize(Objective,n_trials=100)

[I 2025-12-25 22:21:31,170] A new study created in memory with name: no-name-0dca27ce-c757-459a-a264-6e5de5b0de0a
[I 2025-12-25 22:21:31,187] Trial 0 finished with value: 0.7094972067039106 and parameters: {'classifier': 'SVM', 'C': 9.261452250274756, 'kernel': 'poly', 'gamma': 'scale'}. Best is trial 0 with value: 0.7094972067039106.
[I 2025-12-25 22:21:31,200] Trial 1 finished with value: 0.7672253258845437 and parameters: {'classifier': 'SVM', 'C': 1.4405067007202756, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 1 with value: 0.7672253258845437.
[I 2025-12-25 22:21:31,247] Trial 2 finished with value: 0.7858472998137801 and parameters: {'classifier': 'SVM', 'C': 19.23573364140885, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 2 with value: 0.7858472998137801.
[I 2025-12-25 22:21:31,857] Trial 3 finished with value: 0.7467411545623835 and parameters: {'classifier': 'GradientBoosting', 'n_estimators': 64, 'learning_rate': 0.051715951373239706, 'max_depth': 19, 'min_samples_

In [45]:
best_trial = study.best_trial
print('Best trial parameters :',best_trial.params)
print('Best trial accuracy :',best_trial.value)

Best trial parameters : {'classifier': 'SVM', 'C': 0.13728435446507645, 'kernel': 'linear', 'gamma': 'auto'}
Best trial accuracy : 0.7895716945996275
