<a href="https://colab.research.google.com/github/mahesh-ml/MLBasics/blob/main/Optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import optuna

from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score , roc_auc_score
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier

In [5]:
breast_cancer_X , breast_cancer_y = load_breast_cancer(return_X_y=True)
X = pd.DataFrame(breast_cancer_X)
y = pd.Series(breast_cancer_y).map({0:1, 1:0})

In [7]:
X.head()
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=0)

In [11]:
def objective(trial):
  rf_n_estimators = trial.suggest_int("rf_n_estimators",100,1000)
  rf_criterion = trial.suggest_categorical("rf_criterion",['gini','entropy'])
  rf_max_depth= trial.suggest_int("rf_max_depth",1,4)
  rf_min_samples_split=trial.suggest_float('rf_min_samples_split',0.01,1)

  model = RandomForestClassifier(n_estimators=rf_n_estimators,
                                 criterion=rf_criterion,
                                 max_depth=rf_max_depth,
                                 min_samples_split=rf_min_samples_split
                                 )
  score = cross_val_score(model,X_train,y_train,cv=3)
  accuracy = score.mean()
  return accuracy

In [12]:
#randomize search
import optuna
study = optuna.create_study(
        direction="maximize",
        sampler=optuna.samplers.RandomSampler()
        
)

study.optimize(objective, n_trials=5)

[32m[I 2021-11-10 18:38:22,278][0m A new study created in memory with name: no-name-b3969216-690f-4ad4-9c0c-38402ae08903[0m
[32m[I 2021-11-10 18:38:25,741][0m Trial 0 finished with value: 0.9221538695222905 and parameters: {'rf_n_estimators': 724, 'rf_criterion': 'entropy', 'rf_max_depth': 1, 'rf_min_samples_split': 0.42318248158936195}. Best is trial 0 with value: 0.9221538695222905.[0m
[32m[I 2021-11-10 18:38:30,583][0m Trial 1 finished with value: 0.9397357028935976 and parameters: {'rf_n_estimators': 988, 'rf_criterion': 'gini', 'rf_max_depth': 3, 'rf_min_samples_split': 0.12283458070851351}. Best is trial 1 with value: 0.9397357028935976.[0m
[32m[I 2021-11-10 18:38:33,929][0m Trial 2 finished with value: 0.9171223513328776 and parameters: {'rf_n_estimators': 754, 'rf_criterion': 'gini', 'rf_max_depth': 3, 'rf_min_samples_split': 0.49188205221237097}. Best is trial 1 with value: 0.9397357028935976.[0m
[32m[I 2021-11-10 18:38:35,909][0m Trial 3 finished with value: 0.6

In [13]:
study.best_params

{'rf_criterion': 'gini',
 'rf_max_depth': 3,
 'rf_min_samples_split': 0.12283458070851351,
 'rf_n_estimators': 988}

In [14]:
study.best_value

0.9397357028935976

In [16]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_rf_criterion,params_rf_max_depth,params_rf_min_samples_split,params_rf_n_estimators,state
0,0,0.922154,2021-11-10 18:38:22.281093,2021-11-10 18:38:25.741189,0 days 00:00:03.460096,entropy,1,0.423182,724,COMPLETE
1,1,0.939736,2021-11-10 18:38:25.743109,2021-11-10 18:38:30.582887,0 days 00:00:04.839778,gini,3,0.122835,988,COMPLETE
2,2,0.917122,2021-11-10 18:38:30.590528,2021-11-10 18:38:33.928795,0 days 00:00:03.338267,gini,3,0.491882,754,COMPLETE
3,3,0.625636,2021-11-10 18:38:33.931218,2021-11-10 18:38:35.908808,0 days 00:00:01.977590,gini,3,0.742761,476,COMPLETE
4,4,0.625636,2021-11-10 18:38:35.912133,2021-11-10 18:38:38.237779,0 days 00:00:02.325646,gini,2,0.649263,556,COMPLETE


In [17]:
#TPE sampler

study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler()
)

study.optimize(objective, n_trials=10)

[32m[I 2021-11-10 18:43:38,568][0m A new study created in memory with name: no-name-7eecfe99-8b1f-406c-895c-f4e960ccd13e[0m
[32m[I 2021-11-10 18:43:42,365][0m Trial 0 finished with value: 0.9171223513328776 and parameters: {'rf_n_estimators': 841, 'rf_criterion': 'gini', 'rf_max_depth': 2, 'rf_min_samples_split': 0.5486477781715494}. Best is trial 0 with value: 0.9171223513328776.[0m
[32m[I 2021-11-10 18:43:43,404][0m Trial 1 finished with value: 0.9121098200045568 and parameters: {'rf_n_estimators': 229, 'rf_criterion': 'gini', 'rf_max_depth': 1, 'rf_min_samples_split': 0.505327162989641}. Best is trial 0 with value: 0.9171223513328776.[0m
[32m[I 2021-11-10 18:43:47,978][0m Trial 2 finished with value: 0.9321409584567478 and parameters: {'rf_n_estimators': 869, 'rf_criterion': 'entropy', 'rf_max_depth': 3, 'rf_min_samples_split': 0.3216802347515902}. Best is trial 2 with value: 0.9321409584567478.[0m
[32m[I 2021-11-10 18:43:49,590][0m Trial 3 finished with value: 0.92217

In [18]:
study.best_params

{'rf_criterion': 'entropy',
 'rf_max_depth': 3,
 'rf_min_samples_split': 0.23844726104926994,
 'rf_n_estimators': 126}

In [19]:
study.best_value

0.9447292473608263