## Imports

In [1]:
import optuna
import optuna.logging

from sklearn import datasets
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from optuna.samplers import TPESampler
from time import time

optuna.logging.disable_default_handler()

## Data

In [2]:
digits = datasets.load_digits()

n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

## Classifier

In [3]:
# random forest classifier object
rfc = RandomForestClassifier(random_state=42)

## Hyperparameter search space

In [4]:
hyperparam_grid = {
    'n_estimators': [50,100,150,200],
    'criterion': ['gini', 'entropy'],
    'min_samples_split': [2, 3, 4],
    'min_samples_leaf': [1, 2, 3, 4, 5],
    'max_features': ['sqrt', 'log2'],
    'max_depth': [5, 6, 7]
    }

## Results summary function

In [5]:
def get_best_results(param_object, total_time):

    iterations = len(param_object.cv_results_['params'])
    best_score = param_object.best_score_
    best_index = param_object.best_index_+1
    best_params = param_object.best_params_

    print(f"---{param_object.__class__.__name__}---")
    print(f"Total time: {total_time:.2f} seconds")
    print(f"Number of iterations: {iterations}")
    print(f"Best trial index: {best_index}")
    print(f"Best score: {best_score}")
    print(f"Best hyperparameters: {best_params}")

## Grid Search

In [6]:
grid_search = GridSearchCV(estimator=rfc,
                  param_grid=hyperparam_grid,
                  scoring='f1_micro',
                  n_jobs=-1,
                  verbose=0)

start = time()
grid_search.fit(X, y)
total_time = time() - start

get_best_results(grid_search, total_time)

---GridSearchCV---
Total time: 103.56 seconds
Number of iterations: 720
Best trial index: 667
Best score: 0.9360213556174559
Best hyperparameters: {'criterion': 'entropy', 'max_depth': 7, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 150}


## Random Search

In [7]:
random_search = RandomizedSearchCV(estimator=rfc,
                  param_distributions=hyperparam_grid,
                  scoring='f1_micro',
                  n_jobs=-1,
                  verbose=0,
                  n_iter=360)

# perform hyperparamter tuning
start = time()
random_search.fit(X, y)
total_time = time() - start

# store result in a data frame 
get_best_results(random_search, total_time)

---RandomizedSearchCV---
Total time: 50.89 seconds
Number of iterations: 360
Best trial index: 137
Best score: 0.9360213556174559
Best hyperparameters: {'n_estimators': 150, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 7, 'criterion': 'entropy'}


## Optuna

In [8]:
def objective(trial):

    # search space
    n_estimators = trial.suggest_int('n_estimators', low=50, high=200, step=50)
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])
    min_samples_split = trial.suggest_int('min_samples_split', low=2, high=4, step=1)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', low=1, high=5, step=1)
    max_depth = trial.suggest_int('max_depth', low=5, high=7, step=1)
    max_features = trial.suggest_categorical('max_features', [ 'sqrt','log2'])

    # random forest classifier
    rfc = RandomForestClassifier(n_estimators=n_estimators, 
                                 criterion=criterion,
                                 min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf,
                                 max_depth=max_depth,
                                 max_features=max_features)
    
    # return score
    return cross_val_score(estimator=rfc, 
                             X=X, 
                             y=y, 
                             scoring='f1_micro',
                             n_jobs=-1).mean()


# create a study (aim to maximize score)
study = optuna.create_study(sampler=TPESampler(), direction='maximize')

# perform hyperparamter tuning (while timing the process)
start = time()
study.optimize(objective, n_trials=100)
total_time = time() - start

print(f"---Bayesian Optimization---")
print(f"Total time: {total_time:.2f} seconds")
print(f"Number of iterations: {100}")
print(f"Best trial index: {study.best_trial.number}")
print(f"Best score: {study.best_trial.value}")
print(f"Best hyperparameters: {study.best_params}")

---Bayesian Optimization---
Total time: 30.93 seconds
Number of iterations: 100
Best trial index: 71
Best score: 0.9349071494893222
Best hyperparameters: {'n_estimators': 150, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 7, 'max_features': 'sqrt'}


In [9]:
import time
from skopt import BayesSearchCV
from skopt.space import Integer, Categorical
from sklearn.ensemble import RandomForestClassifier


# Definição do espaço de busca
search_space = {
    'n_estimators': Categorical([50, 100, 150, 200]),
    'criterion': Categorical(['gini', 'entropy']),
    'min_samples_split': Integer(2, 4),
    'min_samples_leaf': Integer(1, 5),
    'max_depth': Integer(5, 7),
    'max_features': Categorical(['sqrt', 'log2'])
}

# Inicialização do classificador
rfc = RandomForestClassifier(random_state=42)

# Inicialização do BayesSearchCV
bayes_search = BayesSearchCV(
    estimator=rfc,
    search_spaces=search_space,
    n_iter=100,
    scoring='f1_micro',
    cv=5,  # Pode ajustar conforme necessário
    n_jobs=-1,  # Utiliza todos os núcleos disponíveis
    random_state=42,
    verbose=0  # Ajuste para 1 ou 2 para mais detalhes durante a execução
)

# Medição do tempo e execução da otimização
start_time = time.time()
bayes_search.fit(X, y)
total_time = time.time() - start_time

# Extração dos resultados
best_index = bayes_search.best_index_
best_score = bayes_search.best_score_
best_params = bayes_search.best_params_
#n_iterations = bayes_search.n_iter_

# Impressão dos resultados
print(f"--- Otimização Bayesiana com skopt.BayesSearchCV ---")
print(f"Tempo total: {total_time:.2f} segundos")
#print(f"Número de iterações: {n_iterations}")
print(f"Índice da melhor tentativa: {best_index}")
print(f"Melhor pontuação (f1_micro): {best_score:.4f}")
print(f"Melhores hiperparâmetros: {best_params}")




--- Otimização Bayesiana com skopt.BayesSearchCV ---
Tempo total: 268.16 segundos
Índice da melhor tentativa: 38
Melhor pontuação (f1_micro): 0.9355
Melhores hiperparâmetros: OrderedDict([('criterion', 'entropy'), ('max_depth', 7), ('max_features', 'sqrt'), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 200)])
