In [1]:
!pip install wooldridge

import wooldridge as wd

import pandas as pd
import numpy as np

from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wooldridge
  Downloading wooldridge-0.4.4-py3-none-any.whl (5.1 MB)
[K     |████████████████████████████████| 5.1 MB 4.1 MB/s 
Installing collected packages: wooldridge
Successfully installed wooldridge-0.4.4


In [2]:
wage = wd.data('wage1')[['wage','educ','exper']]
y = wage['wage']
X = wage[['educ','exper']]

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=12345)

In [4]:
def calculo_metricas(modelo, X_train, y_train, X_test, y_test):
  model = modelo
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  y_pred_tr = model.predict(X_train)
  return mean_squared_error(y_train, y_pred_tr), mean_squared_error(y_test, y_pred)

In [5]:
calculo_metricas(Ridge(), X_train, y_train, X_test, y_test)

(11.98590149724332, 6.410415069131133)

In [6]:
calculo_metricas(Lasso(), X_train, y_train, X_test, y_test)

(12.139860898292211, 6.367633533474833)

In [7]:
calculo_metricas(ElasticNet(), X_train, y_train, X_test, y_test)

(12.074066289867494, 6.354467092956616)

In [15]:
gs = GridSearchCV(Ridge(),
                  param_grid={'alpha':np.random.uniform(0.5,10, size=50)},
                  cv=3, scoring='neg_mean_squared_error')

In [16]:
gs.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=Ridge(),
             param_grid={'alpha': array([0.54272581, 1.76540262, 0.90470475, 3.21347216, 7.96937665,
       4.57540034, 3.0684755 , 5.31041556, 8.65883936, 7.545327  ,
       3.82096723, 2.14673874, 5.25474373, 8.19500698, 4.82723539,
       8.45518044, 7.68882349, 2.15016942, 8.32164564, 2.27522066,
       3.54339321, 7.45455919, 4.76974612, 5.11712318, 2.79199575,
       2.3778085 , 1.47578222, 2.12592616, 6.5632835 , 7.69975342,
       1.36100426, 5.03780837, 9.3038897 , 6.27292785, 7.21213315,
       4.77110016, 0.76628302, 5.92852387, 7.92078762, 2.65409785,
       4.01489977, 8.24457646, 6.55462263, 3.31325366, 6.72161929,
       9.57080194, 8.81252607, 2.40270531, 9.17742113, 2.57121622])},
             scoring='neg_mean_squared_error')

In [17]:
gs.best_params_

{'alpha': 3.3132536567482878}

In [18]:
gs = GridSearchCV(Lasso(),
                  param_grid={'alpha':np.random.uniform(0.5,10, size=50)},
                  cv=3, scoring='neg_mean_squared_error')

In [19]:
gs.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=Lasso(),
             param_grid={'alpha': array([8.86793609, 8.38117589, 7.09598926, 0.57702474, 6.85326928,
       4.57429272, 1.76998598, 3.51561445, 8.02084653, 3.92004876,
       4.0704939 , 9.02853746, 2.56300635, 4.75655859, 0.62413314,
       1.79349745, 7.3294928 , 0.83838098, 5.85190743, 9.50952014,
       8.9474955 , 3.33001249, 2.56548281, 0.95967841, 4.66125774,
       1.04276215, 3.58951562, 1.7013962 , 5.67945256, 7.03800319,
       5.94511465, 4.18130203, 4.51495421, 3.55958793, 5.29444906,
       4.55473144, 4.16658858, 9.59742318, 4.28415829, 0.98279054,
       3.01304485, 6.33418077, 1.21791605, 0.51145636, 5.81545919,
       2.12536127, 7.46186644, 3.87016583, 1.05797197, 7.55134718])},
             scoring='neg_mean_squared_error')

In [20]:
gs.best_params_

{'alpha': 0.5114563558197721}

In [21]:
gs = GridSearchCV(ElasticNet(),
                  param_grid={'alpha':np.random.uniform(0.5,10, size=50),
                              'l1_ratio':np.arange(0.1,1,0.1)},
                  cv=3, scoring='neg_mean_squared_error')

In [22]:
gs.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=ElasticNet(),
             param_grid={'alpha': array([8.84897204, 8.00184129, 2.96007222, 7.54567356, 2.93242505,
       1.32890743, 7.11441841, 9.78580592, 9.85155968, 3.71361923,
       3.48666713, 9.31784965, 3.71595902, 0.9522177 , 2.44561458,
       9.36064362, 4.69600982, 1.92255743, 6.89302462, 5.10823708,
       1.40856384, 1.05082439, 9.67026032, 8.85186472, 7.55520421,
       6.299891...45589, 1.60268705, 3.64653104, 4.57636776,
       0.55968266, 2.43424711, 4.37497022, 4.01410662, 2.20237998,
       9.15261769, 8.49450714, 4.53271115, 3.73343164, 5.77096618,
       5.11897447, 7.8850622 , 0.58214274, 2.61281265, 7.66177776,
       6.15571973, 5.84175946, 8.83197307, 3.69466326, 0.97070936]),
                         'l1_ratio': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])},
             scoring='neg_mean_squared_error')

In [23]:
gs.best_params_

{'alpha': 0.5596826617872519, 'l1_ratio': 0.1}

In [24]:
calculo_metricas(ElasticNet(**{'alpha': 0.5596826617872519, 'l1_ratio': 0.1}), X_train, y_train, X_test, y_test)

(12.004062156363096, 6.368150719612897)

In [26]:
modelo = ElasticNet(**{'alpha': 0.5596826617872519, 'l1_ratio': 0.1})
modelo.fit(X, y)

ElasticNet(alpha=0.5596826617872519, l1_ratio=0.1)

In [25]:
import pickle

In [27]:
pickle.dump(modelo, open('model.pickle', 'wb'))

https://labeconometria-pred-elastic-net-app-7m9n97.streamlitapp.com/