In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error
import pickle

Luego de importar las librerías abrimos nuestro archivo anterior para comenzar a trabajar:

In [2]:
df = pd.read_csv('data_ml.csv')

Separamos nuestra variable a predecir.

In [3]:
X = df.drop('price', axis=1)
y = df['price']

Instanciamos el modelo

In [4]:
model = ElasticNet()

Creamos la grilla de parámetros.

In [6]:
param_grid = {"alpha" : [0.9, 1.0],
             "l1_ratio" : [0.4, 0.5],
             "max_iter" : [800, 1000]}

Instanciamos el método GridSearchCV.

In [7]:
grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=5)

Hacemos el fit de nuestro modelo.

In [8]:
grid_search.fit(X, y)

In [9]:
best_params = grid_search.best_params_
best_params

{'alpha': 0.9, 'l1_ratio': 0.4, 'max_iter': 800}

In [10]:
best_mse = grid_search.best_score_
rmse = np.sqrt(-best_mse)

In [11]:
rmse

9.901151901060944

Entrenamos nuestro modelo con los mejores hiperparametros.

In [12]:
modelo_elastic = ElasticNet(alpha=0.9, l1_ratio=0.4, max_iter=800)

In [13]:
modelo_elastic.fit(X, y)

Guardamos el modelo como archivo pickle.

In [14]:
with open('modelo_elastic.pkl', 'wb') as model:
    pickle.dump(modelo_elastic, model)

Prapararemos un DataFrame con algunos valores setedos en 0 para poder pasarle los datos para la predicción luego.

In [14]:
x_prediccion = X.head(1)
lista_columnas = X.columns.to_list()
x_prediccion

Unnamed: 0,early_access,year,Adventure,Animation &amp; Modeling,Audio Production,Captions available,Casual,Co-op,Commentary available,Cross-Platform Multiplayer,...,Steam Leaderboards,Steam Trading Cards,Steam Turn Notifications,Steam Workshop,SteamVR Collectibles,Strategy,Utilities,Valve Anti-Cheat enabled,Video Production,Web Publishing
0,False,2018,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [15]:
lista_columnas[:2]

['early_access', 'year']

In [16]:
lista_features = lista_columnas[2:]
lista_features[0]

'Adventure'

In [17]:
x_prediccion.loc[:,lista_features] = 0
x_prediccion

Unnamed: 0,early_access,year,Adventure,Animation &amp; Modeling,Audio Production,Captions available,Casual,Co-op,Commentary available,Cross-Platform Multiplayer,...,Steam Leaderboards,Steam Trading Cards,Steam Turn Notifications,Steam Workshop,SteamVR Collectibles,Strategy,Utilities,Valve Anti-Cheat enabled,Video Production,Web Publishing
0,False,2018,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
x_prediccion.columns = [column.lower() for column in x_prediccion.columns]
x_prediccion

Unnamed: 0,early_access,year,adventure,animation &amp; modeling,audio production,captions available,casual,co-op,commentary available,cross-platform multiplayer,...,steam leaderboards,steam trading cards,steam turn notifications,steam workshop,steamvr collectibles,strategy,utilities,valve anti-cheat enabled,video production,web publishing
0,False,2018,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
with open('x_prediccion.pkl', 'wb') as x_pred:
    pickle.dump(x_prediccion, x_pred)