# Optimización de un modelo de Random Forest

Este notebook recoge los resultados de la búsqueda del mejor modelo de clasificación mediante Random Forest. El método entrena varios árboles de decisión (de clasificación en este caso) en submuestras de los datos y combina sus resultados para mejorar su precisión.

Para buscar el mejor modelo posible, se tratará de buscar los mejores hiperparámetros para:

* El número de árboles del bosque.
* La profundidad máxima que alcanzan estos.
* Función para evaluar una nueva división de una rama.

### Preparación de los datos

In [2]:
import pandas as pd
import numpy as np
# Data partition
from sklearn.model_selection import train_test_split
# Parameter tunning libraries
import optuna
from sklearn.model_selection import GridSearchCV
# Accuracy function
from sklearn.metrics import accuracy_score
# Model
from sklearn.ensemble import RandomForestClassifier

In [4]:
# Datos de entrenamiento
trainFNC = pd.read_csv("../data/train_FNC.csv")
trainSBM = pd.read_csv("../data/train_SBM.csv")
train_labels = pd.read_csv("../data/train_labels.csv")

# DataFrame con ambas fuentes de datos
train = pd.merge(left=trainFNC, right=trainSBM, left_on='Id', right_on='Id')
data = pd.merge(left=train_labels, right=train, left_on='Id', right_on='Id')
data.drop("Id", inplace=True, axis=1)

# Shuffle de los datos de train
data = data.sample(frac=1, random_state=0)
data.head(5)

Unnamed: 0,Class,FNC1,FNC2,FNC3,FNC4,FNC5,FNC6,FNC7,FNC8,FNC9,...,SBM_map55,SBM_map61,SBM_map64,SBM_map67,SBM_map69,SBM_map71,SBM_map72,SBM_map73,SBM_map74,SBM_map75
2,0,0.24585,0.21662,-0.12468,-0.3538,0.1615,-0.002032,-0.13302,-0.035222,0.25904,...,-0.257114,0.597229,1.220756,-0.059213,-0.435494,-0.092971,1.09091,-0.448562,-0.508497,0.350434
13,1,0.41073,-0.031925,0.2107,0.24226,0.3201,-0.41929,-0.18714,0.16845,0.59979,...,-0.050862,0.870602,0.609465,1.181878,-2.279469,-0.013484,-0.012693,-1.244346,-1.080442,-0.788502
53,1,0.070919,0.034179,-0.011755,0.019158,0.024645,-0.032022,0.00462,0.31817,0.21255,...,-1.539922,-1.495822,1.643866,1.68778,1.521086,-1.988432,-0.267471,0.510576,1.104566,-1.067206
41,0,0.087377,-0.052462,-0.007835,-0.11283,0.38938,0.21608,0.063572,-0.25123,-0.080568,...,-0.077353,-0.459463,-0.204328,-0.619508,-1.410523,-0.304622,-1.521928,0.593691,0.073638,-0.26092
74,0,0.20275,0.19142,-0.056662,-0.15778,0.24404,0.03978,-0.001503,0.001056,-0.048222,...,0.044457,0.593326,1.063052,0.434726,1.604964,-0.359736,0.210107,0.355922,0.730287,-0.323557


Vamos a usar la siguiente partición de los datos:

* 60% train $\sim$ 50 datos
* 20% validation $\sim$ 18 datos (se define al aplicar cross-validación en el ajuste)
* 20% test $\sim$ 18 datos

In [5]:
X = data.iloc[:, 1:]
y = data.iloc[:, 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

print("Tamaño del dataset de train:", X_train.shape)
print("Tamaño del dataset de test:", X_test.shape)

Tamaño del dataset de train: (68, 410)
Tamaño del dataset de test: (18, 410)


In [7]:
# Datos de test
testFNC = pd.read_csv("../data/test_FNC.csv")
testSBM = pd.read_csv("../data/test_SBM.csv")

# DataFrame con ambas fuentes de datos
test = pd.merge(left=testFNC, right=testSBM, left_on='Id', right_on='Id')
test.drop("Id", inplace=True, axis=1)
test.head(5)

Unnamed: 0,FNC1,FNC2,FNC3,FNC4,FNC5,FNC6,FNC7,FNC8,FNC9,FNC10,...,SBM_map55,SBM_map61,SBM_map64,SBM_map67,SBM_map69,SBM_map71,SBM_map72,SBM_map73,SBM_map74,SBM_map75
0,0.476127,0.064466,0.053238,-0.608133,0.073988,-0.637038,0.113556,-0.192434,-0.004025,-0.060474,...,-0.451994,1.12377,2.083006,1.14544,-0.067608,1.202529,0.851587,0.451583,-0.159739,0.192076
1,0.013833,0.267183,0.232178,-0.167151,-0.261327,0.191869,0.406493,0.088761,0.177048,0.036718,...,0.696987,1.397832,1.046136,-0.191733,-2.192023,-0.369276,0.822225,-0.109342,-0.580476,0.17416
2,-0.435452,0.04678,0.243742,0.39703,-0.147821,0.17362,-0.461963,-0.610736,0.419753,0.400985,...,0.160145,1.906989,-2.661633,-0.193911,0.440873,0.641739,0.918397,-0.758046,0.154701,-0.476647
3,-0.20451,-0.036735,-0.760705,-0.740495,0.064668,0.349926,-0.273826,-0.174384,-0.120248,0.175618,...,0.974828,-1.997087,-2.083782,1.154107,-0.643947,2.332424,0.659124,-0.809445,0.55896,2.790871
4,0.599435,-0.166441,0.122431,0.011539,0.346906,-0.01743,-0.274734,0.21151,0.151012,-0.033434,...,-0.789153,1.578984,1.402592,-1.23044,0.296686,2.806314,0.427184,-0.240682,-0.196948,-1.544345


### Modelo

In [8]:
def train_model(model, param_grid):
    '''Función para realizar el entrenamiento y la búsqueda de hiperparámetros'''
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=4)
    # cv = 4 porque así: el conjunto de validation tiene un 0.25 del tamaño de train y: 0.25 * 0.8 = 0.2 ~ 20% datos
    #                    el conjunto de train tiene un 0.75 del tamaño de train y: 0.75 * 0.8 = 0.6 ~ 60% datos
    grid_search.fit(X_train, y_train)
    
    print("Parámetros óptimos:", grid_search.best_params_)
    print("Modelo óptimo:", grid_search.best_estimator_)
    
    return grid_search.best_estimator_

Primera prueba de resultados, probando con valores concretos para el número de árboles y la profundidad de los mismos (parámetros ``n_estimators`` y ``max_depth`` respectivamente) y utilizando el método ``GridSearchCV`` de ``sklearn`` para realizar la búsqueda:

In [9]:
# Definir y entrenar el modelo
model_RF = RandomForestClassifier(random_state=0)
param_grid_RF = {
    "n_estimators": [100, 250, 500, 750, 1000],
    "criterion": ["gini", "entropy"],
    "max_depth": [5, 10, 15, 20, None]
}
model_RF_opt = train_model(model_RF, param_grid_RF)

# Predicción en partición de test
y_pred_RF = model_RF_opt.predict(X_test)

# Precisión en partición de test
accuracy = accuracy_score(y_test, y_pred_RF)
print("Accuracy: {:0.2f}%".format(accuracy * 100))

# Predicción en test para kaggle
y_pred_kaggle_RF = model_RF_opt.predict(test)

Parámetros óptimos: {'criterion': 'entropy', 'max_depth': 5, 'n_estimators': 750}
Modelo óptimo: RandomForestClassifier(criterion='entropy', max_depth=5, n_estimators=750,
                       random_state=0)
Accuracy: 83.33%


Segunda prueba, con un rango más amplio de valores:

In [10]:
# Definir y entrenar el modelo
model_RF = RandomForestClassifier(random_state=0)
param_grid_RF = {
    "n_estimators": range(50, 1050, 50),
    "criterion": ["gini", "entropy"],
    "max_depth": range(1, 21)
}
model_RF_opt = train_model(model_RF, param_grid_RF)

# Predicción en partición de test
y_pred_RF = model_RF_opt.predict(X_test)

# Precisión en partición de test
accuracy = accuracy_score(y_test, y_pred_RF)
print("Accuracy: {:0.2f}%".format(accuracy * 100))

# Predicción en test para kaggle
y_pred_kaggle_RF = model_RF_opt.predict(test)

Parámetros óptimos: {'criterion': 'entropy', 'max_depth': 4, 'n_estimators': 600}
Modelo óptimo: RandomForestClassifier(criterion='entropy', max_depth=4, n_estimators=600,
                       random_state=0)
Accuracy: 72.22%


$\color{red}{\text{¿POR QUÉ EMPEORA?}}$

La librería ``optuna`` es un framework específico para la optimización de hiperparámetros, repetiremos el proceso de búsqueda anterior utilizando esta librería para ver el rendimiento, se espera que los resultados sean iguales si no mejores.

In [11]:
def objectiveRF(trial):
    
    n_estimators =  trial.suggest_int("n_estimators", 50, 1000, 50) # optuna incluye en el rango el máximo y el mínimo
    criterion = trial.suggest_categorical("criterion", ["gini", "entropy"])
    max_depth = trial.suggest_int("max_depth", 1, 20)
    
    modelRF_optuna = RandomForestClassifier(criterion = criterion, max_depth = max_depth, n_estimators = n_estimators, 
                                            random_state=0)
    
    modelRF_optuna.fit(X_train, y_train)

    y_pred_RF_optuna = modelRF_optuna.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_RF_optuna)
    return accuracy

In [12]:
sampler = optuna.samplers.TPESampler(seed=0)  # Asegurar los reproducibilidad de los resultados
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objectiveRF, n_trials=800)
# n_trials = 20 x 2 x 20 = 800

[32m[I 2022-05-23 18:38:24,558][0m A new study created in memory with name: no-name-01857f30-d71f-4367-a0dc-613e3f255b85[0m
[32m[I 2022-05-23 18:38:25,698][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 550, 'criterion': 'gini', 'max_depth': 11}. Best is trial 0 with value: 0.6666666666666666.[0m
[32m[I 2022-05-23 18:38:26,282][0m Trial 1 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 450, 'criterion': 'gini', 'max_depth': 18}. Best is trial 0 with value: 0.6666666666666666.[0m
[32m[I 2022-05-23 18:38:27,747][0m Trial 2 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:38:28,432][0m Trial 3 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 600, 'criterion': 'gini', 'max_depth': 2}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-0

[32m[I 2022-05-23 18:39:08,911][0m Trial 38 finished with value: 0.7222222222222222 and parameters: {'n_estimators': 800, 'criterion': 'gini', 'max_depth': 16}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:39:10,301][0m Trial 39 finished with value: 0.7222222222222222 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 2}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:39:11,300][0m Trial 40 finished with value: 0.7222222222222222 and parameters: {'n_estimators': 650, 'criterion': 'gini', 'max_depth': 5}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:39:12,802][0m Trial 41 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 12}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:39:14,253][0m Trial 42 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion':

[32m[I 2022-05-23 18:39:55,325][0m Trial 76 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 4}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:39:56,830][0m Trial 77 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 7}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:39:58,240][0m Trial 78 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:39:59,589][0m Trial 79 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:40:00,872][0m Trial 80 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'crit

[32m[I 2022-05-23 18:40:44,688][0m Trial 114 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 16}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:40:46,196][0m Trial 115 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:40:47,598][0m Trial 116 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:40:48,963][0m Trial 117 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:40:50,424][0m Trial 118 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000

[32m[I 2022-05-23 18:41:37,938][0m Trial 152 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:41:39,247][0m Trial 153 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:41:40,555][0m Trial 154 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 16}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:41:42,035][0m Trial 155 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 7}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:41:43,613][0m Trial 156 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000

[32m[I 2022-05-23 18:42:25,418][0m Trial 190 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:42:26,846][0m Trial 191 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:42:28,403][0m Trial 192 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:42:29,794][0m Trial 193 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:42:31,160][0m Trial 194 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 

[32m[I 2022-05-23 18:43:21,045][0m Trial 228 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:43:22,508][0m Trial 229 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:43:23,717][0m Trial 230 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:43:24,485][0m Trial 231 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 550, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:43:25,000][0m Trial 232 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 350, 'c

[32m[I 2022-05-23 18:44:09,954][0m Trial 266 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 600, 'criterion': 'entropy', 'max_depth': 7}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:44:11,239][0m Trial 267 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:44:11,741][0m Trial 268 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 350, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:44:13,399][0m Trial 269 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:44:14,768][0m Trial 270 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 900,

[32m[I 2022-05-23 18:45:02,287][0m Trial 304 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:45:03,552][0m Trial 305 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:45:04,931][0m Trial 306 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:45:06,252][0m Trial 307 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:45:07,637][0m Trial 308 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 

[32m[I 2022-05-23 18:46:25,110][0m Trial 342 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:46:28,121][0m Trial 343 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:46:31,494][0m Trial 344 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:46:34,816][0m Trial 345 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:46:37,783][0m Trial 346 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 

[32m[I 2022-05-23 18:48:09,667][0m Trial 380 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:48:12,318][0m Trial 381 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 5}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:48:14,693][0m Trial 382 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:48:15,684][0m Trial 383 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 300, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:48:18,682][0m Trial 384 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, '

[32m[I 2022-05-23 18:49:39,611][0m Trial 418 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:49:40,604][0m Trial 419 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 350, 'criterion': 'entropy', 'max_depth': 5}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:49:43,654][0m Trial 420 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:49:45,939][0m Trial 421 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 4}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:49:49,458][0m Trial 422 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, '

[32m[I 2022-05-23 18:51:25,403][0m Trial 456 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:51:28,309][0m Trial 457 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:51:31,515][0m Trial 458 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:51:34,649][0m Trial 459 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:51:38,083][0m Trial 460 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950,

[32m[I 2022-05-23 18:53:10,184][0m Trial 494 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 400, 'criterion': 'entropy', 'max_depth': 12}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:53:11,923][0m Trial 495 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:53:13,766][0m Trial 496 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:53:15,292][0m Trial 497 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:53:16,825][0m Trial 498 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1

[32m[I 2022-05-23 18:54:07,730][0m Trial 532 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 350, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:54:09,206][0m Trial 533 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:54:10,683][0m Trial 534 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:54:12,016][0m Trial 535 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:54:13,368][0m Trial 536 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 90

[32m[I 2022-05-23 18:55:02,031][0m Trial 570 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:55:03,808][0m Trial 571 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:55:05,307][0m Trial 572 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:55:06,597][0m Trial 573 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:55:08,274][0m Trial 574 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, '

[32m[I 2022-05-23 18:55:54,309][0m Trial 608 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:55:55,980][0m Trial 609 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:55:57,681][0m Trial 610 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:55:58,995][0m Trial 611 finished with value: 0.8333333333333334 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:56:00,914][0m Trial 612 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950,

[32m[I 2022-05-23 18:56:43,871][0m Trial 646 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:56:45,136][0m Trial 647 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:56:46,034][0m Trial 648 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 600, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:56:47,302][0m Trial 649 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:56:48,322][0m Trial 650 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 650, 

[32m[I 2022-05-23 18:57:40,502][0m Trial 684 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:57:43,515][0m Trial 685 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:57:46,363][0m Trial 686 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:57:49,084][0m Trial 687 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:57:52,240][0m Trial 688 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900,

[32m[I 2022-05-23 18:58:56,074][0m Trial 722 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:58:57,578][0m Trial 723 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:58:59,423][0m Trial 724 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:59:01,181][0m Trial 725 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:59:02,581][0m Trial 726 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900,

[32m[I 2022-05-23 18:59:53,897][0m Trial 760 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 950, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:59:55,387][0m Trial 761 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:59:57,175][0m Trial 762 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 18:59:59,921][0m Trial 763 finished with value: 0.5555555555555556 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 1}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 19:00:02,431][0m Trial 764 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 90

[32m[I 2022-05-23 19:01:17,828][0m Trial 798 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 2 with value: 0.8888888888888888.[0m
[32m[I 2022-05-23 19:01:19,193][0m Trial 799 finished with value: 0.8888888888888888 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 2 with value: 0.8888888888888888.[0m


In [13]:
study.best_trial

FrozenTrial(number=2, values=[0.8888888888888888], datetime_start=datetime.datetime(2022, 5, 23, 18, 38, 26, 282051), datetime_complete=datetime.datetime(2022, 5, 23, 18, 38, 27, 731799), params={'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 11}, distributions={'n_estimators': IntUniformDistribution(high=1000, low=50, step=50), 'criterion': CategoricalDistribution(choices=('gini', 'entropy')), 'max_depth': IntUniformDistribution(high=20, low=1, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=2, state=TrialState.COMPLETE, value=None)

In [14]:
# Definir y entrenar el modelo
modelRF_optuna = RandomForestClassifier(criterion = "entropy", max_depth = 11, n_estimators = 1000, random_state=0)  
modelRF_optuna.fit(X_train, y_train)

# Predicción en partición de test
y_pred_RF_optuna = modelRF_optuna.predict(X_test)

# Precisión en partición de test
accuracy = accuracy_score(y_test, y_pred_RF_optuna)
print("Accuracy: {:0.2f}%".format(accuracy * 100))

Accuracy: 88.89%


La librería ``optuna`` obtiene mejores resultados.

El código anterior, aunque realiza una búsqueda sobre el mismo rango de parámetros usados para el segundo intento con ``sklearn``, no está aplicando cross-validation an el entrenamiento. La siguiente celda sí lo implementa mediante el método ``OptunaSearchCV`` de ``optuna``:

In [16]:
# Definir y entrenar el modelo
model_RF = RandomForestClassifier(random_state=0)
param_grid_RF = {
    "n_estimators": optuna.distributions.IntUniformDistribution(50, 1000, 50),
    "criterion": optuna.distributions.CategoricalDistribution(["gini", "entropy"]),
    "max_depth": optuna.distributions.IntUniformDistribution(1, 20)
}

optuna_search = optuna.integration.OptunaSearchCV(model_RF, param_grid_RF, cv=4, n_trials=800, random_state=0)
# n_trials = 20 x 2 x 20 = 800
optuna_search.fit(X_train, y_train)

  optuna_search = optuna.integration.OptunaSearchCV(model_RF, param_grid_RF, cv=4, n_trials=800, random_state=0)
[32m[I 2022-05-23 19:05:42,765][0m A new study created in memory with name: no-name-c59f6910-0389-4284-997d-cb1731c80c85[0m
[32m[I 2022-05-23 19:05:45,917][0m Trial 0 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 500, 'criterion': 'entropy', 'max_depth': 4}. Best is trial 0 with value: 0.6911764705882353.[0m
[32m[I 2022-05-23 19:05:48,713][0m Trial 1 finished with value: 0.6029411764705883 and parameters: {'n_estimators': 600, 'criterion': 'gini', 'max_depth': 15}. Best is trial 0 with value: 0.6911764705882353.[0m
[32m[I 2022-05-23 19:05:53,844][0m Trial 2 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 0 with value: 0.6911764705882353.[0m
[32m[I 2022-05-23 19:05:54,101][0m Trial 3 finished with value: 0.5588235294117647 and parameters: {'n_estimato

[32m[I 2022-05-23 19:08:04,067][0m Trial 36 finished with value: 0.5882352941176471 and parameters: {'n_estimators': 600, 'criterion': 'gini', 'max_depth': 2}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:08:09,282][0m Trial 37 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 5}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:08:12,466][0m Trial 38 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 550, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:08:12,779][0m Trial 39 finished with value: 0.6323529411764707 and parameters: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 11}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:08:18,048][0m Trial 40 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 900, 'criterion

[32m[I 2022-05-23 19:10:37,269][0m Trial 74 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:10:40,977][0m Trial 75 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:10:45,191][0m Trial 76 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 850, 'criterion': 'gini', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:10:49,873][0m Trial 77 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 16}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:10:53,813][0m Trial 78 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 650, 'cr

[32m[I 2022-05-23 19:14:02,651][0m Trial 112 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:14:07,114][0m Trial 113 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:14:10,754][0m Trial 114 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:14:14,915][0m Trial 115 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:14:19,611][0m Trial 116 finished with value: 0.676470588235294 and parameters: {'n_estimators': 8

[32m[I 2022-05-23 19:16:42,932][0m Trial 150 finished with value: 0.6470588235294118 and parameters: {'n_estimators': 800, 'criterion': 'gini', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:16:47,009][0m Trial 151 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:16:50,846][0m Trial 152 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:16:55,194][0m Trial 153 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:16:59,950][0m Trial 154 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850

[32m[I 2022-05-23 19:20:08,992][0m Trial 188 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:20:19,554][0m Trial 189 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:20:32,028][0m Trial 190 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:20:44,540][0m Trial 191 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:20:58,179][0m Trial 192 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-23 19:29:20,843][0m Trial 226 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:29:34,247][0m Trial 227 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:29:54,566][0m Trial 228 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:30:12,250][0m Trial 229 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:30:28,798][0m Trial 230 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 80

[32m[I 2022-05-23 19:38:27,128][0m Trial 264 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:38:41,138][0m Trial 265 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:38:55,537][0m Trial 266 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:39:08,958][0m Trial 267 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:39:16,685][0m Trial 268 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-23 19:46:30,210][0m Trial 302 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:46:42,481][0m Trial 303 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:46:54,599][0m Trial 304 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:47:05,960][0m Trial 305 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:47:14,327][0m Trial 306 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 8

[32m[I 2022-05-23 19:53:49,317][0m Trial 340 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:54:01,875][0m Trial 341 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:54:12,406][0m Trial 342 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:54:23,374][0m Trial 343 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 9}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 19:54:29,832][0m Trial 344 finished with value: 0.6323529411764706 and parameters: {'n_estimators': 40

[32m[I 2022-05-23 20:01:00,479][0m Trial 378 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:01:13,863][0m Trial 379 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:01:25,898][0m Trial 380 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:01:36,156][0m Trial 381 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:01:46,565][0m Trial 382 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 85

[32m[I 2022-05-23 20:08:32,738][0m Trial 416 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 600, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:08:35,010][0m Trial 417 finished with value: 0.6323529411764706 and parameters: {'n_estimators': 150, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:08:47,696][0m Trial 418 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 12}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:08:52,281][0m Trial 419 finished with value: 0.6764705882352942 and parameters: {'n_estimators': 300, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:09:04,756][0m Trial 420 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-23 20:15:31,851][0m Trial 454 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:15:44,254][0m Trial 455 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:15:57,030][0m Trial 456 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:16:10,740][0m Trial 457 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 20:16:24,565][0m Trial 458 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 

[32m[I 2022-05-23 22:08:49,610][0m Trial 493 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:09:02,242][0m Trial 494 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:09:12,858][0m Trial 495 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:09:23,762][0m Trial 496 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:09:36,307][0m Trial 497 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 7

[32m[I 2022-05-23 22:15:43,086][0m Trial 531 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:15:56,543][0m Trial 532 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:16:05,173][0m Trial 533 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:16:18,308][0m Trial 534 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:16:29,559][0m Trial 535 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-23 22:21:46,452][0m Trial 569 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:21:55,646][0m Trial 570 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:22:03,754][0m Trial 571 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:22:11,410][0m Trial 572 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:22:23,169][0m Trial 573 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 

[32m[I 2022-05-23 22:27:33,885][0m Trial 607 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:27:44,184][0m Trial 608 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:27:53,938][0m Trial 609 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:28:04,705][0m Trial 610 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:28:13,756][0m Trial 611 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-23 22:33:51,156][0m Trial 645 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 16}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:34:01,514][0m Trial 646 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:34:12,114][0m Trial 647 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:34:22,127][0m Trial 648 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:34:32,246][0m Trial 649 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 80

[32m[I 2022-05-23 22:40:01,012][0m Trial 683 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:40:08,582][0m Trial 684 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 16}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:40:16,757][0m Trial 685 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:40:26,141][0m Trial 686 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:40:36,426][0m Trial 687 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 

[32m[I 2022-05-23 22:45:40,904][0m Trial 721 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:45:49,088][0m Trial 722 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:45:59,475][0m Trial 723 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 850, 'criterion': 'entropy', 'max_depth': 15}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:46:06,117][0m Trial 724 finished with value: 0.5735294117647058 and parameters: {'n_estimators': 600, 'criterion': 'entropy', 'max_depth': 1}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:46:16,493][0m Trial 725 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 8

[32m[I 2022-05-23 22:51:18,764][0m Trial 759 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:51:27,395][0m Trial 760 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:51:35,684][0m Trial 761 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 750, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:51:43,157][0m Trial 762 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:51:51,626][0m Trial 763 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-23 22:57:17,088][0m Trial 797 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:57:26,962][0m Trial 798 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 7}. Best is trial 12 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 22:57:36,480][0m Trial 799 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 5}. Best is trial 12 with value: 0.7058823529411764.[0m


OptunaSearchCV(cv=4, estimator=RandomForestClassifier(random_state=0),
               n_trials=800,
               param_distributions={'criterion': CategoricalDistribution(choices=('gini', 'entropy')),
                                    'max_depth': IntUniformDistribution(high=20, low=1, step=1),
                                    'n_estimators': IntUniformDistribution(high=1000, low=50, step=50)},
               random_state=0)

In [19]:
# Predicción en partición de test
y_pred_RF = optuna_search.predict(X_test)

# Precisión en partición de test
accuracy = accuracy_score(y_test, y_pred_RF)
print("Accuracy: {:0.2f}%".format(accuracy * 100))

Accuracy: 83.33%


En este caso, aplicar cross-validation no mejora los resultados.

$\color{red}{\text{CÓMO PUEDE SALIR MEJOR PROBANDO MENOS PARÁMETROS??}}$

In [20]:
# Definir y entrenar el modelo
model_RF = RandomForestClassifier(random_state=0)
param_grid_RF = {
    "n_estimators": optuna.distributions.IntUniformDistribution(100, 1000, 100),
    "criterion": optuna.distributions.CategoricalDistribution(["gini", "entropy"]),
    "max_depth": optuna.distributions.IntUniformDistribution(1, 20)
}

optuna_search = optuna.integration.OptunaSearchCV(model_RF, param_grid_RF, cv=4, n_trials=400, random_state=0)
# n_trials = 10 x 2 x 20 = 400
optuna_search.fit(X_train, y_train)

  optuna_search = optuna.integration.OptunaSearchCV(model_RF, param_grid_RF, cv=4, n_trials=400, random_state=0)
[32m[I 2022-05-23 23:45:04,719][0m A new study created in memory with name: no-name-61b1a9ed-119c-4357-8782-1e81401a437b[0m
[32m[I 2022-05-23 23:45:07,973][0m Trial 0 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 500, 'criterion': 'entropy', 'max_depth': 4}. Best is trial 0 with value: 0.6911764705882353.[0m
[32m[I 2022-05-23 23:45:12,264][0m Trial 1 finished with value: 0.6029411764705883 and parameters: {'n_estimators': 600, 'criterion': 'gini', 'max_depth': 15}. Best is trial 0 with value: 0.6911764705882353.[0m
[32m[I 2022-05-23 23:45:17,797][0m Trial 2 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 0 with value: 0.6911764705882353.[0m
[32m[I 2022-05-23 23:45:18,329][0m Trial 3 finished with value: 0.6176470588235294 and parameters: {'n_estimato

[32m[I 2022-05-23 23:47:27,489][0m Trial 36 finished with value: 0.676470588235294 and parameters: {'n_estimators': 900, 'criterion': 'gini', 'max_depth': 16}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:47:33,912][0m Trial 37 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:47:41,827][0m Trial 38 finished with value: 0.6764705882352942 and parameters: {'n_estimators': 1000, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:47:46,713][0m Trial 39 finished with value: 0.6470588235294118 and parameters: {'n_estimators': 800, 'criterion': 'gini', 'max_depth': 15}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:47:52,994][0m Trial 40 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 900, 'crite

[32m[I 2022-05-23 23:51:00,925][0m Trial 74 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:51:05,159][0m Trial 75 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 14}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:51:10,908][0m Trial 76 finished with value: 0.676470588235294 and parameters: {'n_estimators': 900, 'criterion': 'gini', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:51:16,602][0m Trial 77 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 13}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:51:21,532][0m Trial 78 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'cri

[32m[I 2022-05-23 23:54:18,060][0m Trial 112 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 19}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:54:23,370][0m Trial 113 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:54:28,704][0m Trial 114 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:54:34,265][0m Trial 115 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:54:40,400][0m Trial 116 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 

[32m[I 2022-05-23 23:57:40,071][0m Trial 150 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:57:45,289][0m Trial 151 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:57:50,502][0m Trial 152 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:57:55,329][0m Trial 153 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-23 23:58:00,335][0m Trial 154 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-24 00:00:50,180][0m Trial 188 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 5}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:00:55,999][0m Trial 189 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 900, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:01:00,929][0m Trial 190 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:01:05,782][0m Trial 191 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 12}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:01:10,706][0m Trial 192 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 80

[32m[I 2022-05-24 00:04:29,348][0m Trial 226 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:04:38,293][0m Trial 227 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:04:46,751][0m Trial 228 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:04:54,715][0m Trial 229 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:05:04,333][0m Trial 230 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 

[32m[I 2022-05-24 00:10:13,114][0m Trial 264 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:10:18,766][0m Trial 265 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 5}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:10:24,444][0m Trial 266 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:10:30,824][0m Trial 267 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:10:36,488][0m Trial 268 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800,

[32m[I 2022-05-24 00:13:25,179][0m Trial 302 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 8}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:13:31,058][0m Trial 303 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:13:39,236][0m Trial 304 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:13:45,171][0m Trial 305 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:13:50,798][0m Trial 306 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 8

[32m[I 2022-05-24 00:18:47,625][0m Trial 340 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:18:56,697][0m Trial 341 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 20}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:19:06,218][0m Trial 342 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 11}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:19:14,021][0m Trial 343 finished with value: 0.6911764705882353 and parameters: {'n_estimators': 700, 'criterion': 'entropy', 'max_depth': 10}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:19:24,771][0m Trial 344 finished with value: 0.6617647058823529 and parameters: {'n_estimators': 

[32m[I 2022-05-24 00:24:08,190][0m Trial 378 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 17}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:24:18,707][0m Trial 379 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:24:27,410][0m Trial 380 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 6}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:24:36,243][0m Trial 381 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 800, 'criterion': 'entropy', 'max_depth': 18}. Best is trial 28 with value: 0.7058823529411764.[0m
[32m[I 2022-05-24 00:24:45,048][0m Trial 382 finished with value: 0.7058823529411764 and parameters: {'n_estimators': 80

OptunaSearchCV(cv=4, estimator=RandomForestClassifier(random_state=0),
               n_trials=400,
               param_distributions={'criterion': CategoricalDistribution(choices=('gini', 'entropy')),
                                    'max_depth': IntUniformDistribution(high=20, low=1, step=1),
                                    'n_estimators': IntUniformDistribution(high=1000, low=100, step=100)},
               random_state=0)

In [21]:
y_pred_RF = optuna_search.predict(X_test)

# Precisión en partición de test
accuracy = accuracy_score(y_test, y_pred_RF)
print("Accuracy: {:0.2f}%".format(accuracy * 100))

Accuracy: 88.89%


# Create submissions

In [None]:
import pathlib
from datetime import datetime

def create_submission(pred, method, test_id=testFNC["Id"]):
    submissionDF = pd.DataFrame(list(zip(test_id, pred)), columns=["Id", "Probability"])
    print(submissionDF.shape) # Comprobación del tamaño, debe ser: (119748, 2)
    current_time = datetime.now().strftime("%d-%m-%Y_%Hh%Mmin")
    current_path = pathlib.Path().resolve()
    parent_path = current_path.parent
    submissionDF.to_csv(f"{parent_path}\submissions\MLSP_submission_{method}_{current_time}.csv", header=True, index=False)