In [2]:
# Importar las librerías necesarias
import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_val_score
import optuna
from sklearn.impute import SimpleImputer
import joblib



In [10]:
# Definir la ruta al proyecto usando raw string literal
project_folder = "H:\\Users\\Martin\\Desktop\\Especialización  IA\\1 - Bimestres y Cursos\\3B\\3. Aprendizaje de Máquina II\\TP - MartinCaceres_AMq2\\MartinCaceres_AMq2_TPFinal"

# Verificar si la ruta existe
if os.path.exists(project_folder):
    print("La ruta especificada existe.")
else:
    print("La ruta especificada no existe.")

# Cargar los datos procesados desde la Notebook del científico de datos
train_data_path = "train_final.csv"
test_data_path = "test_final.csv"

df_train = pd.read_csv(train_data_path)
df_test = pd.read_csv(test_data_path)

# Separar características y etiquetas
X_train_imputed = df_train.drop("Item_Outlet_Sales", axis=1)
y_train = df_train["Item_Outlet_Sales"]
X_test_imputed = df_test


La ruta especificada existe.


# Random Forest


In [11]:
# Definir la función objetivo para la optimización de Random Forest
def random_forest_objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 100, 1000)
    max_depth = trial.suggest_int("max_depth", 5, 30)
    min_samples_split = trial.suggest_float("min_samples_split", 0.1, 1)
    min_samples_leaf = trial.suggest_float("min_samples_leaf", 0.1, 0.5)
    
    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )
    
    # Utilizar validación cruzada para calcular la métrica de evaluación
    score = cross_val_score(model, X_train_imputed, y_train, cv=3)
    
    return score.mean()

In [12]:
# Crear el estudio de Optuna y ejecutar la optimización para Random Forest
study_rf = optuna.create_study(direction='maximize')
study_rf.optimize(random_forest_objective, n_trials=10)

# Imprimir los resultados de la optimización para Random Forest
print('Best trial (Random Forest): score {}, params {}'.format(study_rf.best_trial.value, study_rf.best_trial.params))

# Obtener los mejores hiperparámetros para Random Forest
best_params_rf = study_rf.best_params

# Crear y entrenar el modelo final de Random Forest con los mejores hiperparámetros
final_model_rf = RandomForestRegressor(**best_params_rf)
final_model_rf.fit(X_train_imputed, y_train)

# Guardar el modelo entrenado
model_path_rf = os.path.join(project_folder, "models", "random_forest_model.joblib")
joblib.dump(final_model_rf, model_path_rf)

[I 2023-08-27 17:52:06,843] A new study created in memory with name: no-name-a706a425-6be3-4179-8ca9-2e8284461aa4
[I 2023-08-27 17:52:09,799] Trial 0 finished with value: 0.31017675053023924 and parameters: {'n_estimators': 326, 'max_depth': 28, 'min_samples_split': 0.11310586726659337, 'min_samples_leaf': 0.10678109903034488}. Best is trial 0 with value: 0.31017675053023924.
[I 2023-08-27 17:52:12,891] Trial 1 finished with value: -0.00018101500312354504 and parameters: {'n_estimators': 957, 'max_depth': 5, 'min_samples_split': 0.6686863593341036, 'min_samples_leaf': 0.4383761748711812}. Best is trial 0 with value: 0.31017675053023924.
[I 2023-08-27 17:52:13,600] Trial 2 finished with value: -0.00017740890045240634 and parameters: {'n_estimators': 215, 'max_depth': 17, 'min_samples_split': 0.9682915791935155, 'min_samples_leaf': 0.3481945708531672}. Best is trial 0 with value: 0.31017675053023924.
[I 2023-08-27 17:52:16,643] Trial 3 finished with value: -0.00018145218490254558 and par

Best trial (Random Forest): score 0.31017675053023924, params {'n_estimators': 326, 'max_depth': 28, 'min_samples_split': 0.11310586726659337, 'min_samples_leaf': 0.10678109903034488}


['H:\\Users\\Martin\\Desktop\\Especialización  IA\\1 - Bimestres y Cursos\\3B\\3. Aprendizaje de Máquina II\\TP - MartinCaceres_AMq2\\MartinCaceres_AMq2_TPFinal\\models\\random_forest_model.joblib']

# SVM

In [13]:
# Definir la función objetivo para la optimización de SVM
def svm_objective(trial):
    C = trial.suggest_float("C", 0.1, 10)
    epsilon = trial.suggest_float("epsilon", 0.01, 0.2)
    
    model = SVR(
        C=C,
        epsilon=epsilon
    )
    
    # Utilizar validación cruzada para calcular la métrica de evaluación
    score = cross_val_score(model, X_train_imputed, y_train, cv=3, scoring='neg_mean_squared_error')
    
    return score.mean()

In [14]:
# Crear el estudio de Optuna y ejecutar la optimización para SVM
study_svm = optuna.create_study(direction='minimize')
study_svm.optimize(svm_objective, n_trials=10)

# Imprimir los resultados de la optimización para SVM
print('Best trial (SVM): score {}, params {}'.format(-study_svm.best_trial.value, study_svm.best_trial.params))

# Obtener los mejores hiperparámetros para SVM
best_params_svm = study_svm.best_params

# Crear y entrenar el modelo final de SVM con los mejores hiperparámetros
final_model_svm = SVR(**best_params_svm)
final_model_svm.fit(X_train_imputed, y_train)

# Guardar el modelo entrenado
model_path_svm = os.path.join(project_folder, "models", "svm_model.joblib")
joblib.dump(final_model_svm, model_path_svm)

[I 2023-08-27 17:52:38,187] A new study created in memory with name: no-name-0f2cc23e-adcf-40cd-85ab-1d947fe033c4
[I 2023-08-27 17:52:48,914] Trial 0 finished with value: -3058968.0222425167 and parameters: {'C': 3.7212351751517616, 'epsilon': 0.17429054596367338}. Best is trial 0 with value: -3058968.0222425167.
[I 2023-08-27 17:52:59,238] Trial 1 finished with value: -3058688.5935104266 and parameters: {'C': 4.052471415620174, 'epsilon': 0.04422106244628556}. Best is trial 0 with value: -3058968.0222425167.
[I 2023-08-27 17:53:09,592] Trial 2 finished with value: -3058984.300312621 and parameters: {'C': 8.00824329057538, 'epsilon': 0.12310005142119393}. Best is trial 2 with value: -3058984.300312621.
[I 2023-08-27 17:53:19,937] Trial 3 finished with value: -3057685.9118294255 and parameters: {'C': 2.6247181730658187, 'epsilon': 0.11656567833585112}. Best is trial 2 with value: -3058984.300312621.
[I 2023-08-27 17:53:30,407] Trial 4 finished with value: -3060475.191078247 and paramete

Best trial (SVM): score 3061497.6034882334, params {'C': 0.5430297908860753, 'epsilon': 0.1968806339154638}


['H:\\Users\\Martin\\Desktop\\Especialización  IA\\1 - Bimestres y Cursos\\3B\\3. Aprendizaje de Máquina II\\TP - MartinCaceres_AMq2\\MartinCaceres_AMq2_TPFinal\\models\\svm_model.joblib']

# Redes Neuronales

In [15]:
# Definir la función objetivo para la optimización de Redes Neuronales
def neural_network_objective(trial):
    hidden_layer_sizes = tuple([trial.suggest_int("n_units_l{}".format(i), 1, 100) for i in range(trial.suggest_int("n_layers", 1, 5))])
    activation = trial.suggest_categorical("activation", ["identity", "logistic", "tanh", "relu"])
    alpha = trial.suggest_float("alpha", 1e-5, 1e-1, log=True)
    
    model = MLPRegressor(
        hidden_layer_sizes=hidden_layer_sizes,
        activation=activation,
        alpha=alpha,
        random_state=42
    )
    
    # Utilizar validación cruzada para calcular la métrica de evaluación
    score = cross_val_score(model, X_train_imputed, y_train, cv=3, scoring='neg_mean_squared_error')
    
    return score.mean()

In [16]:
# Crear el estudio de Optuna y ejecutar la optimización para Redes Neuronales
study_nn = optuna.create_study(direction='minimize')
study_nn.optimize(neural_network_objective, n_trials=10)

# Imprimir los resultados de la optimización para Redes Neuronales
print('Best trial (Neural Network): score {}, params {}'.format(-study_nn.best_trial.value, study_nn.best_trial.params))

# Obtener los mejores hiperparámetros para Redes Neuronales
best_params_nn = study_nn.best_params

# Crear y entrenar el modelo final de Redes Neuronales con los mejores hiperparámetros
final_model_nn = MLPRegressor(
    hidden_layer_sizes=tuple([best_params_nn[f"n_units_l{i}"] for i in range(best_params_nn["n_layers"])]),
    activation=best_params_nn["activation"],
    alpha=best_params_nn["alpha"],
    random_state=42
)
final_model_nn.fit(X_train_imputed, y_train)

# Guardar el modelo entrenado
model_path_nn = os.path.join(project_folder, "models", "neural_network_model.joblib")
joblib.dump(final_model_nn, model_path_nn)

[I 2023-08-27 17:54:25,542] A new study created in memory with name: no-name-b3c78fab-323b-4731-a34f-d12fa9b9c26d
[I 2023-08-27 17:54:32,694] Trial 0 finished with value: -1492070.4699969615 and parameters: {'n_layers': 3, 'n_units_l0': 25, 'n_units_l1': 15, 'n_units_l2': 47, 'activation': 'identity', 'alpha': 0.000179310698956104}. Best is trial 0 with value: -1492070.4699969615.
[I 2023-08-27 17:54:56,181] Trial 1 finished with value: -1504030.0913739158 and parameters: {'n_layers': 4, 'n_units_l0': 6, 'n_units_l1': 98, 'n_units_l2': 57, 'n_units_l3': 75, 'activation': 'identity', 'alpha': 0.0001456477114782891}. Best is trial 1 with value: -1504030.0913739158.
[I 2023-08-27 17:55:15,937] Trial 2 finished with value: -1804260.7515367258 and parameters: {'n_layers': 5, 'n_units_l0': 51, 'n_units_l1': 100, 'n_units_l2': 93, 'n_units_l3': 63, 'n_units_l4': 2, 'activation': 'identity', 'alpha': 0.06334294779642975}. Best is trial 2 with value: -1804260.7515367258.
[I 2023-08-27 17:56:01,

Best trial (Neural Network): score 7642043.074482799, params {'n_layers': 2, 'n_units_l0': 6, 'n_units_l1': 5, 'activation': 'relu', 'alpha': 9.870814707455301e-05}




['H:\\Users\\Martin\\Desktop\\Especialización  IA\\1 - Bimestres y Cursos\\3B\\3. Aprendizaje de Máquina II\\TP - MartinCaceres_AMq2\\MartinCaceres_AMq2_TPFinal\\models\\neural_network_model.joblib']