In [13]:
import time
import pandas as pd
import numpy as np
import warnings
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score
import optuna


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)
warnings.filterwarnings("ignore")

In [None]:
rf_grid = [{'n_estimators': [300, 500, 1000, 2000],
           'criterion': ['gini', 'entropy'],
              'max_depth': [10, 20, 30, None],
                'min_samples_split': [2, 5, 8, 13],
               'min_samples_leaf': [2, 5, 8, 13],
               'max_features': ['sqrt', 'log2', None],
               'bootstrap': [True, False]
           }]

In [27]:
def RF_objective(trial):
    max_depth = trial.suggest_int('max_depth', 1, 50)
    max_leaf_nodes = trial.suggest_int('max_leaf_nodes', 2, 1000)
    n_estimators =  trial.suggest_int('n_estimators', 100, 2000)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 21)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 2, 21)
    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2', None])
    bootstrap = trial.suggest_categorical('bootstrap', [True,False])
   
    model = RandomForestClassifier(max_depth = max_depth, max_leaf_nodes = max_leaf_nodes,n_estimators = n_estimators,
                                   min_samples_split= min_samples_split, min_samples_leaf=min_samples_leaf,
                                   max_features=max_features, bootstrap=bootstrap,random_state=24)

    
    model.fit(x, y)    
    scores = cross_val_score(model, x, y, cv=skf, n_jobs=-1, scoring="accuracy",verbose=24,error_score='raise')
    acc_mean = scores.mean()

    return acc_mean

In [28]:
RF_study = optuna.create_study(direction='maximize')
RF_study.optimize(RF_objective, n_trials=10)

[32m[I 2023-02-21 23:35:06,884][0m A new study created in memory with name: no-name-894f2a4e-a87c-429c-82ac-13b56abf1bcf[0m
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   34.4s
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:   34.5s remaining:   51.7s
[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:   34.5s remaining:   22.9s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   34.6s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   34.6s finished
[32m[I 2023-02-21 23:36:09,125][0m Trial 0 finished with value: 0.7101842374616172 and parameters: {'max_depth': 26, 'max_leaf_nodes': 227, 'n_estimators': 1184, 'min_samples_split': 19, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 0.7101842374616172.[0m
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tas

In [29]:
trial = RF_study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

Accuracy: 0.7662231320368476
Best hyperparameters: {'max_depth': 50, 'max_leaf_nodes': 786, 'n_estimators': 1826, 'min_samples_split': 12, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}
