In [22]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
pd.set_option("max_columns", 500)

In [23]:
from sklearn.linear_model import Ridge, Lasso, ElasticNet, BayesianRidge, Lars

In [24]:
boston = load_boston() 

In [25]:
boston

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
         9.1400e+00],
        [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
         4.0300e+00],
        ...,
        [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         5.6400e+00],
        [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
         6.4800e+00],
        [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         7.8800e+00]]),
 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
        18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
        15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
        13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
        21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
        35.4, 24.7, 3

In [26]:
boston.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

In [27]:
print(boston['DESCR'])

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [28]:
df = pd.DataFrame(data = boston["data"], columns=boston["feature_names"])

In [29]:
df["target"] = boston["target"]
tgt = "target"


In [30]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [31]:
ls_pred = [x for x in df.columns if x not in [tgt]]

In [32]:
X = df[ls_pred]
y = df[tgt]

In [33]:
from sklearn.model_selection import train_test_split

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Modelado

## LASSO

In [36]:
from sklearn.model_selection import cross_val_score

In [37]:
model_lasso = Lasso()

In [38]:
model_lasso.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': False,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [39]:
model_lasso.fit(X_train, y_train)

Lasso()

In [40]:
ls_medias = cross_val_score(estimator=model_lasso, X=X_train, y = y_train, cv = 4, n_jobs=-1, scoring="r2")

In [41]:
ls_medias

array([0.63805061, 0.68234712, 0.69431002, 0.59734902])

In [42]:

np.mean(ls_medias), np.std(ls_medias)

(0.6530141909888982, 0.038367585031444724)

In [43]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "tol": [0.00001, 0.0000001, 0.01],
}

In [44]:
np.prod(list(map(len, param_grid.values())))

327

In [46]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [47]:
clf = GridSearchCV(model_lasso, param_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2", verbose=5)
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

Fitting 4 folds for each of 327 candidates, totalling 1308 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 832 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 1288 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 1308 out of 1308 | elapsed:    5.6s finished


Best score: 0.7162544957452255


  self.best_estimator_.fit(X, y, **fit_params)
  positive)
  positive)


In [48]:
summary = pd.DataFrame(clf.cv_results_)

In [49]:
summary

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.009826,0.003359,0.007702,0.001812,1,1e-05,"{'alpha': 1, 'tol': 1e-05}",0.638055,0.682349,0.694319,0.597350,0.653018,0.038370,32
1,0.007781,0.003319,0.009069,0.005749,1,1e-07,"{'alpha': 1, 'tol': 1e-07}",0.638056,0.682349,0.694318,0.597350,0.653018,0.038370,31
2,0.009153,0.003035,0.008366,0.001371,1,0.01,"{'alpha': 1, 'tol': 0.01}",0.637678,0.682118,0.694385,0.597257,0.652860,0.038414,33
3,0.015219,0.011472,0.007855,0.004035,2,1e-05,"{'alpha': 2, 'tol': 1e-05}",0.585342,0.616167,0.605753,0.562067,0.592332,0.020695,35
4,0.011433,0.002483,0.009827,0.005895,2,1e-07,"{'alpha': 2, 'tol': 1e-07}",0.585345,0.616168,0.605756,0.562068,0.592335,0.020695,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322,0.007568,0.001161,0.007158,0.003156,0.8,1e-07,"{'alpha': 0.8, 'tol': 1e-07}",0.663291,0.701771,0.720087,0.600119,0.671317,0.045933,25
323,0.008588,0.001855,0.006714,0.002373,0.8,0.01,"{'alpha': 0.8, 'tol': 0.01}",0.663208,0.701679,0.720145,0.600073,0.671276,0.045955,27
324,0.009950,0.001955,0.012215,0.006181,0.9,1e-05,"{'alpha': 0.9, 'tol': 1e-05}",0.651357,0.692520,0.707732,0.599135,0.662686,0.042090,29
325,0.009210,0.002890,0.007857,0.002801,0.9,1e-07,"{'alpha': 0.9, 'tol': 1e-07}",0.651358,0.692520,0.707732,0.599135,0.662686,0.042090,28


In [50]:
summary.sort_values(by = "rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
298,0.015950,0.003600,0.004277,0.000616,0,1e-07,"{'alpha': 0.0, 'tol': 1e-07}",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
299,0.014438,0.005808,0.003353,0.000340,0,0.01,"{'alpha': 0.0, 'tol': 0.01}",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
297,0.030489,0.011830,0.008318,0.003448,0,1e-05,"{'alpha': 0.0, 'tol': 1e-05}",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
302,0.004148,0.000465,0.002856,0.000932,0.1,0.01,"{'alpha': 0.1, 'tol': 0.01}",0.710248,0.740037,0.777060,0.585224,0.703142,0.072077,4
300,0.004293,0.000946,0.002671,0.000541,0.1,1e-05,"{'alpha': 0.1, 'tol': 1e-05}",0.710252,0.739962,0.776999,0.585270,0.703121,0.072033,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.004192,0.000419,0.002902,0.000210,98,1e-07,"{'alpha': 98, 'tol': 1e-07}",0.159753,0.266739,0.256590,0.118553,0.200409,0.063066,323
293,0.005178,0.000971,0.003461,0.000528,98,0.01,"{'alpha': 98, 'tol': 0.01}",0.159766,0.266772,0.256613,0.118463,0.200404,0.063107,324
294,0.005085,0.000949,0.003590,0.000979,99,1e-05,"{'alpha': 99, 'tol': 1e-05}",0.159503,0.266389,0.255958,0.118440,0.200072,0.062911,325
295,0.011457,0.005408,0.007234,0.003897,99,1e-07,"{'alpha': 99, 'tol': 1e-07}",0.159502,0.266389,0.255958,0.118440,0.200072,0.062911,326


In [51]:
clf.best_estimator_

Lasso(alpha=0.0, tol=1e-05)

In [52]:
dc_scores={}

In [53]:
dc_scores[str(model_lasso).split("(")[0]] = {"model": clf.best_estimator_, "score": clf.best_score_}

In [54]:
dc_scores

{'Lasso': {'model': Lasso(alpha=0.0, tol=1e-05), 'score': 0.7162544957452255}}

## RIDGE

In [55]:
model_ridge = Ridge()

In [56]:
model_ridge.fit(X_train, y_train)
ls_medias_ridge = cross_val_score(estimator=model_ridge, X=X_test, y = y_test, cv = 4, n_jobs=-1, scoring="r2")
np.mean(ls_medias_ridge), np.std(ls_medias_ridge)

(0.7101004100928999, 0.056724134354270345)

In [57]:
model_ridge.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'normalize': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.001}

In [58]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "tol": [0.00001, 0.0000001, 0.01],
    "solver": ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
}

In [59]:
np.prod(list(map(len, param_grid.values())))

2289

In [60]:
clf_ridge = GridSearchCV(model_ridge, param_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2")
clf_ridge.fit(X_train, y_train)
print("Best score: " + str(clf_ridge.best_score_))

Best score: 0.7162544957452257


In [61]:
summary_ridge = pd.DataFrame(clf_ridge.cv_results_)

In [62]:
summary_ridge

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_solver,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.008736,0.005580,0.007121,0.004539,1,auto,1e-05,"{'alpha': 1, 'solver': 'auto', 'tol': 1e-05}",0.721728,0.745962,0.793213,0.589662,0.712641,0.075513,132
1,0.009355,0.003797,0.005395,0.000941,1,auto,1e-07,"{'alpha': 1, 'solver': 'auto', 'tol': 1e-07}",0.721728,0.745962,0.793213,0.589662,0.712641,0.075513,132
2,0.009103,0.004251,0.005023,0.000997,1,auto,0.01,"{'alpha': 1, 'solver': 'auto', 'tol': 0.01}",0.721728,0.745962,0.793213,0.589662,0.712641,0.075513,132
3,0.007832,0.001753,0.004279,0.000394,1,svd,1e-05,"{'alpha': 1, 'solver': 'svd', 'tol': 1e-05}",0.721728,0.745962,0.793213,0.589662,0.712641,0.075513,138
4,0.006307,0.000342,0.005071,0.001777,1,svd,1e-07,"{'alpha': 1, 'solver': 'svd', 'tol': 1e-07}",0.721728,0.745962,0.793213,0.589662,0.712641,0.075513,138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2284,0.068827,0.005132,0.003345,0.000237,0.9,sag,1e-07,"{'alpha': 0.9, 'solver': 'sag', 'tol': 1e-07}",0.678892,0.715088,0.738276,0.600889,0.683286,0.052067,1438
2285,0.006583,0.000261,0.004616,0.002409,0.9,sag,0.01,"{'alpha': 0.9, 'solver': 'sag', 'tol': 0.01}",0.567192,0.602826,0.601439,0.550712,0.580542,0.022368,2136
2286,0.087357,0.007000,0.003361,0.000237,0.9,saga,1e-05,"{'alpha': 0.9, 'solver': 'saga', 'tol': 1e-05}",0.663364,0.697489,0.709287,0.606696,0.669209,0.039837,1652
2287,0.077342,0.007712,0.003091,0.000822,0.9,saga,1e-07,"{'alpha': 0.9, 'solver': 'saga', 'tol': 1e-07}",0.663361,0.697473,0.709262,0.606676,0.669193,0.039836,1657


In [63]:
summary_ridge.sort_values(by = "rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_solver,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
2079,0.006728,0.001784,0.003615,0.000221,0,auto,1e-05,"{'alpha': 0.0, 'solver': 'auto', 'tol': 1e-05}",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
2080,0.004487,0.000286,0.003619,0.000286,0,auto,1e-07,"{'alpha': 0.0, 'solver': 'auto', 'tol': 1e-07}",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
2081,0.004027,0.000085,0.002980,0.000058,0,auto,0.01,"{'alpha': 0.0, 'solver': 'auto', 'tol': 0.01}",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
2085,0.005268,0.000653,0.006061,0.004522,0,cholesky,1e-05,"{'alpha': 0.0, 'solver': 'cholesky', 'tol': 1e...",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
2086,0.005448,0.001228,0.003387,0.000098,0,cholesky,1e-07,"{'alpha': 0.0, 'solver': 'cholesky', 'tol': 1e...",0.722374,0.748016,0.796943,0.597686,0.716254,0.073511,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1973,0.021745,0.010992,0.007179,0.003214,94,saga,0.01,"{'alpha': 94, 'solver': 'saga', 'tol': 0.01}",0.526921,0.570489,0.595270,0.510625,0.550826,0.033725,2285
1763,0.013133,0.004130,0.004285,0.000708,84,saga,0.01,"{'alpha': 84, 'solver': 'saga', 'tol': 0.01}",0.526511,0.571174,0.594571,0.510815,0.550768,0.033615,2286
1805,0.012026,0.006052,0.003752,0.000609,86,saga,0.01,"{'alpha': 86, 'solver': 'saga', 'tol': 0.01}",0.526712,0.572168,0.594523,0.509421,0.550706,0.034134,2287
2057,0.008018,0.000266,0.003099,0.000053,98,saga,0.01,"{'alpha': 98, 'solver': 'saga', 'tol': 0.01}",0.526160,0.570280,0.594949,0.511121,0.550628,0.033578,2288


In [64]:
dc_scores[str(model_ridge).split("(")[0]] = {"model": clf_ridge.best_estimator_, "score": clf_ridge.best_score_}

In [65]:
dc_scores


{'Lasso': {'model': Lasso(alpha=0.0, tol=1e-05), 'score': 0.7162544957452255},
 'Ridge': {'model': Ridge(alpha=0.0, tol=1e-05), 'score': 0.7162544957452257}}

## Elastic Net

In [66]:
Ridge?

In [67]:
ElasticNet?

In [68]:
model_elastic = ElasticNet()

In [69]:
model_elastic.fit(X_train, y_train)
ls_medias_elastic = cross_val_score(estimator=model_elastic, X=X_test, y = y_test, cv = 4, n_jobs=-1, scoring="r2")
np.mean(ls_medias_elastic), np.std(ls_medias_elastic)

(0.6650999296743175, 0.07533556883650339)

In [70]:
model_elastic.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'l1_ratio': 0.5,
 'max_iter': 1000,
 'normalize': False,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [71]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "l1_ratio": [x/1000 for x in range(1, 100)] + [y/10 for y in range(10)],
}

In [72]:
np.prod(list(map(len, param_grid.values())))

11881

In [51]:
clf_elastic = GridSearchCV(model_elastic, param_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2", verbose=5,)
clf_elastic.fit(X_train, y_train)
print("Best score: " + str(clf_elastic.best_score_))

Fitting 4 folds for each of 11881 candidates, totalling 47524 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 204 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 1552 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 3568 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   16.5s
[Parallel(n_jobs=-1)]: Done 9328 tasks      | elapsed:   24.6s
[Parallel(n_jobs=-1)]: Done 13072 tasks      | elapsed:   33.4s
[Parallel(n_jobs=-1)]: Done 17392 tasks      | elapsed:   43.2s
[Parallel(n_jobs=-1)]: Done 22288 tasks      | elapsed:   54.9s
[Parallel(n_jobs=-1)]: Done 27760 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 33808 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 40432 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 47437 tasks      | elapsed:  1.9min


Best score: 0.7039226119356685


[Parallel(n_jobs=-1)]: Done 47524 out of 47524 | elapsed:  1.9min finished
  self.best_estimator_.fit(X, y, **fit_params)
  positive)
  positive)


In [52]:
summary_elastic = pd.DataFrame(clf_elastic.cv_results_)

In [53]:
summary_elastic.sort_values(by = "rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
10898,0.015740,0.002050,0.008296,0.003323,0,0.8,"{'alpha': 0.0, 'l1_ratio': 0.8}",0.697849,0.694097,0.662440,0.761304,0.703923,0.035871,1
10869,0.015426,0.006580,0.005710,0.002009,0,0.079,"{'alpha': 0.0, 'l1_ratio': 0.079}",0.697849,0.694097,0.662440,0.761304,0.703923,0.035871,1
10868,0.018191,0.006144,0.010477,0.005732,0,0.078,"{'alpha': 0.0, 'l1_ratio': 0.078}",0.697849,0.694097,0.662440,0.761304,0.703923,0.035871,1
10867,0.015622,0.005479,0.005426,0.000778,0,0.077,"{'alpha': 0.0, 'l1_ratio': 0.077}",0.697849,0.694097,0.662440,0.761304,0.703923,0.035871,1
10866,0.014800,0.003767,0.004209,0.000455,0,0.076,"{'alpha': 0.0, 'l1_ratio': 0.076}",0.697849,0.694097,0.662440,0.761304,0.703923,0.035871,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10354,0.008141,0.001185,0.006945,0.003804,95,0.9,"{'alpha': 95, 'l1_ratio': 0.9}",0.246056,0.175134,0.215186,0.249875,0.221563,0.029991,11877
10463,0.014416,0.014210,0.010371,0.009295,96,0.9,"{'alpha': 96, 'l1_ratio': 0.9}",0.245886,0.174828,0.214981,0.249809,0.221376,0.030070,11878
10572,0.007075,0.000801,0.007934,0.005393,97,0.9,"{'alpha': 97, 'l1_ratio': 0.9}",0.245714,0.174520,0.214775,0.249738,0.221187,0.030148,11879
10681,0.013741,0.004829,0.016866,0.004820,98,0.9,"{'alpha': 98, 'l1_ratio': 0.9}",0.245539,0.174210,0.214567,0.249664,0.220995,0.030226,11880


In [54]:
dc_scores[str(model_elastic).split("(")[0]] = {"model": clf_elastic.best_estimator_, "score": clf_elastic.best_score_}

In [55]:
dc_scores

{'Lasso': {'model': Lasso(alpha=0.0, tol=1e-05), 'score': 0.7039226119356685},
 'Ridge': {'model': Ridge(alpha=0.0, solver='lsqr', tol=1e-05),
  'score': 0.7039248166523288},
 'ElasticNet': {'model': ElasticNet(alpha=0.0, l1_ratio=0.001),
  'score': 0.7039226119356685}}

In [56]:
clf = RandomizedSearchCV(n_iter=2000, estimator=model_elastic, param_distributions=param_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2", verbose=5)
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

Fitting 4 folds for each of 2000 candidates, totalling 8000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 1552 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 3568 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   10.8s


Best score: 0.7039226119356685


[Parallel(n_jobs=-1)]: Done 8000 out of 8000 | elapsed:   13.8s finished
  self.best_estimator_.fit(X, y, **fit_params)
  positive)
  positive)


## Guardar el Modelo

In [57]:
dc_scores

{'Lasso': {'model': Lasso(alpha=0.0, tol=1e-05), 'score': 0.7039226119356685},
 'Ridge': {'model': Ridge(alpha=0.0, solver='lsqr', tol=1e-05),
  'score': 0.7039248166523288},
 'ElasticNet': {'model': ElasticNet(alpha=0.0, l1_ratio=0.001),
  'score': 0.7039226119356685}}

In [73]:
dc_scores['Ridge']['model']

Ridge(alpha=0.0, tol=1e-05)

In [74]:
pd.to_pickle(dc_scores['Ridge']['model'],'model_ridge_housing.pkl')

In [75]:
modelo_ganador=pd.read_pickle('model_ridge_housing.pkl')

In [76]:
modelo_ganador.predict(X_test)

array([14.57365781, 29.72147382, 24.70774663, 24.43056519, 12.27614664,
       21.24569033, 16.61048106,  2.99248138, 22.73651538, 38.3704696 ,
       24.9134093 , 35.51888005, 26.24465748, 22.79709479, 34.80869692,
       11.98133787, 27.19428072, 17.64009066, 17.42247711, 25.9554644 ,
       16.54921459, 30.89509118, 25.77074177,  6.34721333, 37.63246636,
       25.13635952, 26.48667895, 24.32248731, 16.70199369, 23.55462898,
       24.67454083, 17.35743361, 32.23542768, 23.83075902, 18.99996142,
       22.17090867, 23.79316329, 20.23489601, 17.66845129, 20.67169728,
       -6.73332691,  8.04277852, 32.01746851, 27.20257336, 30.97264655,
       36.5431944 , 13.08154311, 27.66295951, 19.58066711, 27.86796527,
       21.16561181, 30.94165467,  3.93185828, 26.50188277, 25.45674385,
       32.91254868, 23.00318887,  8.31004745, 14.41136953, 33.60388896,
       27.46194056, 29.14125113, 21.91050651, 18.98143451, 28.54805711,
        6.44578785, 22.99333427, 20.62352659, 23.6194863 , 10.67