## Hyperparameters tunning

Voy a usar optuna para tratar de optizar hiperparámetros del modelo. \
Para tener hiperparámetros para optimizar, en lugar de usar Linear Regression Voy a usar ElasticNet
\
\
La data será el oputput de FetureEngieering ya creado anteriormente

In [1]:
# Imports
import numpy as np
import optuna
import pandas as pd

# Importando librerías para el modelo
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
from sklearn import metrics
from sklearn.linear_model import ElasticNet

In [2]:
df = pd.read_csv('../data/BigMart_Final.csv')
df.head()

Unnamed: 0,Item_Identifier,Item_Weight,Item_Visibility,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Item_Outlet_Sales,Set,Outlet_Type_Grocery Store,Outlet_Type_Supermarket Type1,Outlet_Type_Supermarket Type2,Outlet_Type_Supermarket Type3
0,FDA15,9.3,0.016047,4,OUT049,21,1,2,3735.138,train,0,1,0,0
1,DRC01,5.92,0.019278,1,OUT018,11,1,0,443.4228,train,0,0,1,0
2,FDN15,17.5,0.01676,2,OUT049,21,1,2,2097.27,train,0,1,0,0
3,FDX07,19.2,0.0,3,OUT010,22,0,0,732.38,train,1,0,0,0
4,NCD19,8.93,0.0,1,OUT013,33,2,0,994.7052,train,0,1,0,0


In [3]:

# Eliminación de variables que no contribuyen a la predicción por ser muy específicas
dataset = df.drop(columns=['Item_Identifier', 'Outlet_Identifier']).copy()

# División del dataset de train y test
df_train = dataset.loc[df['Set'] == 'train']
df_test = dataset.loc[df['Set'] == 'test']

# Eliminando columnas sin datos
df_train = df_train.drop(['Set'], axis=1)
df_test = df_test.drop(['Item_Outlet_Sales','Set'], axis=1)

X = df_train.drop(columns='Item_Outlet_Sales')
# División de dataset de entrenaimento y validación

seed = 28

x_train, x_test, y_train, y_test = \
    train_test_split(X, df_train['Item_Outlet_Sales'], test_size = 0.3, random_state=seed)

def objective(trial):

    """
    Objective function for optimizing hyperparameters of a RandomForestClassifier using Optuna.
    
    Args:
        trial: A `Trial` object from Optuna that contains the state of the optimization trial.
    
    Returns:
        The accuracy score of the RandomForestClassifier using the hyperparameters suggested by Optuna.
    """
    # Definir los hiperparámetros a optimizar
    alpha = trial.suggest_float("alpha", 0.01, 1)
    l1_ratio = trial.suggest_float("l1_ratio", 0.01, 1)

    # Crear el clasificador con los hiperparámetros sugeridos por Optuna
    model = ElasticNet(
                                    alpha=alpha,
                                    l1_ratio=l1_ratio,                                    
                                    )
    
    # Entrenar el clasificador y calcular la precisión en el conjunto de prueba
    model.fit(x_train, y_train)

    
    score = cross_val_score(model, x_train, y_train, cv=3)
    accuracy = score.mean()

    return accuracy

In [4]:
# Crear el estudio de Optuna y ejecutar la optimización

#sampler = optuna.samplers.RandomSampler(seed=28)
sampler = optuna.samplers.TPESampler(seed=28)
study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(objective, n_trials=50)

# Imprimir los resultados de la optimización
print('Best trial: score {}, params {}'.format(study.best_trial.value, study.best_trial.params))

[32m[I 2023-10-23 14:55:50,111][0m A new study created in memory with name: no-name-f6125f4f-00a4-4e5c-9dfb-3d26be50a560[0m
[32m[I 2023-10-23 14:55:50,146][0m Trial 0 finished with value: 0.3979361862598716 and parameters: {'alpha': 0.7317236048662279, 'l1_ratio': 0.5656272063043647}. Best is trial 0 with value: 0.3979361862598716.[0m
[32m[I 2023-10-23 14:55:50,184][0m Trial 1 finished with value: 0.4868904956863309 and parameters: {'alpha': 0.13371742108914852, 'l1_ratio': 0.403616445013116}. Best is trial 1 with value: 0.4868904956863309.[0m
[32m[I 2023-10-23 14:55:50,218][0m Trial 2 finished with value: 0.3833039549715604 and parameters: {'alpha': 0.7834951242107413, 'l1_ratio': 0.515883051836582}. Best is trial 1 with value: 0.4868904956863309.[0m
[32m[I 2023-10-23 14:55:50,258][0m Trial 3 finished with value: 0.5161564267370464 and parameters: {'alpha': 0.19086642483166677, 'l1_ratio': 0.8549777464963643}. Best is trial 3 with value: 0.5161564267370464.[0m
[32m[I 2

Best trial: score 0.5244807194650187, params {'alpha': 0.2142497308223184, 'l1_ratio': 0.9894639758933537}


In [5]:
# Modelo final
en_model = ElasticNet(**study.best_params)
en_model.fit(x_train, y_train)

predictions = en_model.predict(x_test)

en_model.score(x_test, y_test)

0.5527013940738024