<a href="https://colab.research.google.com/github/mahesh-ml/MLBasics/blob/main/Optuna_logitRF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import optuna

from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score , roc_auc_score
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

breast_cancer_X , breast_cancer_y = load_breast_cancer(return_X_y=True)
X = pd.DataFrame(breast_cancer_X)
y = pd.Series(breast_cancer_y).map({0:1, 1:0})

X.head()
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=0)

def objective(trial):
    classifier_name = trial.suggest_categorical("classifier",["logit","RF"])
    if classifier_name =='logit':
              logit_penalty = trial.suggest_categorical('logit_penalty',['l1','l2'])
              logit_c= trial.suggest_float('logit_c',0.001,10)
              logit_solver='saga'
          
              model = LogisticRegression(penalty=logit_penalty,
                                     C = logit_c,
                                     solver=logit_solver
                                     )
    elif classifier_name =='RF':
              rf_n_estimators = trial.suggest_int("rf_n_estimators",100,1000)
              rf_criterion = trial.suggest_categorical("rf_criterion",['gini','entropy'])
              rf_max_depth= trial.suggest_int("rf_max_depth",1,4)
              rf_min_samples_split=trial.suggest_float('rf_min_samples_split',0.01,1)
    
              model = RandomForestClassifier(n_estimators=rf_n_estimators,
                                     criterion=rf_criterion,
                                     max_depth=rf_max_depth,
                                     min_samples_split=rf_min_samples_split
                                     )

    score = cross_val_score(model,X_train,y_train,cv=3)
    accuracy = score.mean()
    return accuracy

#randomize search
import optuna
study = optuna.create_study(
        direction="maximize",
        sampler=optuna.samplers.RandomSampler()
        
)

study.optimize(objective, n_trials=5)









Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 7.6 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cliff
  Downloading cliff-3.9.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 9.7 MB/s 
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting alembic
  Downloading alembic-1.7.4-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 61.8 MB/s 
Collecting Mako
  Downloading Mako-1.1.5-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 4.5 MB/s 
Collecting cmd2>=1.0.0
  Downloading cmd2-2.2.0-py3-none-any.whl (144 kB)
[K     |████████████████████████████████| 144 kB 63.3 MB/s 
Collecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.7.0-py2.py3-none-any.whl (112 kB)
[K     |████████████████████████████████| 112 kB 75.6 MB/s 
[?25hCollecting autopage>=0.4.0
  Down

[32m[I 2021-11-10 20:01:31,626][0m A new study created in memory with name: no-name-e906798b-2d68-4cf8-bf82-71617befa02d[0m
[32m[I 2021-11-10 20:01:32,095][0m Trial 0 finished with value: 0.6256360598465861 and parameters: {'classifier': 'RF', 'rf_n_estimators': 104, 'rf_criterion': 'entropy', 'rf_max_depth': 3, 'rf_min_samples_split': 0.8967964785810173}. Best is trial 0 with value: 0.6256360598465861.[0m
[32m[I 2021-11-10 20:01:36,314][0m Trial 1 finished with value: 0.9171223513328776 and parameters: {'classifier': 'RF', 'rf_n_estimators': 878, 'rf_criterion': 'entropy', 'rf_max_depth': 3, 'rf_min_samples_split': 0.6001396602946567}. Best is trial 1 with value: 0.9171223513328776.[0m

The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge

[32m[I 2021-11-10 20:01:36,377][0m Trial 2 finished with value: 0.9120148856990963 and paramete

Best Param ---- {'classifier': 'RF', 'rf_n_estimators': 878, 'rf_criterion': 'entropy', 'rf_max_depth': 3, 'rf_min_samples_split': 0.6001396602946567}
Best accuracy----  0.9171223513328776
experiment table----    number     value  ... params_rf_n_estimators     state
0       0  0.625636  ...                  104.0  COMPLETE
1       1  0.917122  ...                  878.0  COMPLETE
2       2  0.912015  ...                    NaN  COMPLETE
3       3  0.912015  ...                    NaN  COMPLETE
4       4  0.625636  ...                  587.0  COMPLETE

[5 rows x 13 columns]


In [3]:
print('Best Param ----' ,study.best_params)


print('Best accuracy---- ',study.best_value)


print(study.trials_dataframe())

Best Param ---- {'classifier': 'RF', 'rf_n_estimators': 878, 'rf_criterion': 'entropy', 'rf_max_depth': 3, 'rf_min_samples_split': 0.6001396602946567}
Best accuracy----  0.9171223513328776
   number     value  ... params_rf_n_estimators     state
0       0  0.625636  ...                  104.0  COMPLETE
1       1  0.917122  ...                  878.0  COMPLETE
2       2  0.912015  ...                    NaN  COMPLETE
3       3  0.912015  ...                    NaN  COMPLETE
4       4  0.625636  ...                  587.0  COMPLETE

[5 rows x 13 columns]
