# Regularization Part 3 - Elastic Net Gridsearch

## Import libraries and Read Dataset


In [148]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (10,6)
pd.set_option('display.max_columns', 100)

In [149]:
df = pd.read_csv("https://raw.githubusercontent.com/msddk/DS_Bootcamp/main/Machine%20Learning/Linear%20Regression/Advertising.csv")

In [150]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [151]:
df.shape

(200, 4)

In [152]:
X = df.drop("sales", axis =1)
y = df["sales"]

In [153]:
X.head()

Unnamed: 0,TV,radio,newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4


### Polynomial Features

In [154]:
from sklearn.preprocessing import PolynomialFeatures

We will create an artificial overfitting case by using the degree as 5.

In [155]:
polynomial_converter = PolynomialFeatures(degree=5, include_bias=False)

poly_features = polynomial_converter.fit_transform(X)

In [156]:
poly_features.shape

(200, 55)

### Train | Test Split

In [157]:
from sklearn.model_selection import train_test_split

In [158]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=42)

### Scaling the Data

In [159]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [160]:
scaler.fit(X_train)

StandardScaler()

In [161]:
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Elastic net

In [162]:
from sklearn.linear_model import ElasticNet, ElasticNetCV

In [163]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def train_val(y_train, y_train_pred, y_test, y_pred, name):
    
    scores = {name+"_train": {"R2" : r2_score(y_train, y_train_pred),
    "mae" : mean_absolute_error(y_train, y_train_pred),
    "mse" : mean_squared_error(y_train, y_train_pred),                          
    "rmse" : np.sqrt(mean_squared_error(y_train, y_train_pred))},
    
    name+"_test": {"R2" : r2_score(y_test, y_pred),
    "mae" : mean_absolute_error(y_test, y_pred),
    "mse" : mean_squared_error(y_test, y_pred),
    "rmse" : np.sqrt(mean_squared_error(y_test, y_pred))}}
    
    return pd.DataFrame(scores)

In [164]:
elastic_model = ElasticNet(alpha=1, l1_ratio=0.5, random_state=42) # l1_ratio: 1: Lasso or 0:Ridge
elastic_model.fit(X_train_scaled, y_train)

ElasticNet(alpha=1, random_state=42)

In [165]:
y_pred = elastic_model.predict(X_test_scaled)
y_train_pred = elastic_model.predict(X_train_scaled)

In [166]:
es = train_val(y_train, y_train_pred, y_test, y_pred, "elastic")
es

Unnamed: 0,elastic_train,elastic_test
R2,0.895108,0.897352
mae,1.214719,1.235418
mse,2.75246,2.802755
rmse,1.659054,1.674143


### ElasticNet Cross Validation (Default parameters: `alpha = 1` and `l1_ratio=0.5`)

In [167]:
from sklearn.model_selection import cross_validate

In [168]:
model = ElasticNet(alpha=1, l1_ratio=0.5, random_state=42)
scores = cross_validate(model, X_train_scaled, y_train,
                        scoring=['r2', 'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv=5)

In [169]:
scores = pd.DataFrame(scores, index = range(1, 6))
scores.iloc[:,2:]

Unnamed: 0,test_r2,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_root_mean_squared_error
1,0.917248,-1.063815,-1.693685,-1.301417
2,0.914136,-1.180277,-2.175158,-1.474842
3,0.856208,-1.631517,-4.130984,-2.032482
4,0.944925,-1.001975,-1.76335,-1.327912
5,0.766037,-1.385597,-4.885967,-2.210422


In [170]:
scores = pd.DataFrame(scores, index = range(1, 11))
scores.iloc[:,2:].mean()

test_r2                             0.879711
test_neg_mean_absolute_error       -1.252636
test_neg_mean_squared_error        -2.929829
test_neg_root_mean_squared_error   -1.669415
dtype: float64

In [171]:
train_val(y_train, y_train_pred, y_test, y_pred, "elastic")

Unnamed: 0,elastic_train,elastic_test
R2,0.895108,0.897352
mae,1.214719,1.235418
mse,2.75246,2.802755
rmse,1.659054,1.674143


In [172]:
elastic_model.coef_

array([0.86989993, 0.31154405, 0.        , 0.29973261, 0.9703704 ,
       0.04306067, 0.082779  , 0.        , 0.        , 0.        ,
       0.52735708, 0.        , 0.57776417, 0.09286208, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.17439297, 0.        , 0.25339469, 0.        , 0.        ,
       0.30318853, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.0361855 , 0.        , 0.        , 0.        , 0.10636793,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])

In [173]:
em_df = pd.DataFrame(elastic_model.coef_, columns=["elastic_coef_(alpha:1, l1:0.5)"])
em_df

Unnamed: 0,"elastic_coef_(alpha:1, l1:0.5)"
0,0.8699
1,0.311544
2,0.0
3,0.299733
4,0.97037
5,0.043061
6,0.082779
7,0.0
8,0.0
9,0.0


### Choosing best alpha and l1_ratio values by ElasticNetCV

In [174]:
alpha_space = np.linspace(0.01, 1, 100)
alpha_space

array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
       0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22,
       0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33,
       0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44,
       0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54, 0.55,
       0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65, 0.66,
       0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77,
       0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88,
       0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99,
       1.  ])

In [175]:
elastic_cv_model = ElasticNetCV(alphas = alpha_space, l1_ratio=[0.1, 0.5, 0.7,0.9, 0.95, 1], cv = 5, 
                                max_iter = 100000,random_state=42)

In [176]:
elastic_cv_model.fit(X_train_scaled, y_train)

ElasticNetCV(alphas=array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
       0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22,
       0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33,
       0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44,
       0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54, 0.55,
       0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65, 0.66,
       0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77,
       0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88,
       0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99,
       1.  ]),
             cv=5, l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 1], max_iter=100000,
             random_state=42)

In [177]:
elastic_cv_model.alpha_

0.01

In [178]:
elastic_cv_model.l1_ratio_

1.0

In [179]:
y_pred = elastic_cv_model.predict(X_test_scaled)
y_train_pred = elastic_cv_model.predict(X_train_scaled)

In [180]:
ecs = train_val(y_train, y_train_pred, y_test, y_pred, "elastic_cv")
ecs

Unnamed: 0,elastic_cv_train,elastic_cv_test
R2,0.98748,0.985852
mae,0.347294,0.50324
mse,0.328546,0.386314
rmse,0.573189,0.621542


In [181]:
pd.concat([es, ecs], axis = 1)

Unnamed: 0,elastic_train,elastic_test,elastic_cv_train,elastic_cv_test
R2,0.895108,0.897352,0.98748,0.985852
mae,1.214719,1.235418,0.347294,0.50324
mse,2.75246,2.802755,0.328546,0.386314
rmse,1.659054,1.674143,0.573189,0.621542


In [182]:
elastic_cv_model.coef_

array([ 4.61288745,  0.45927272,  0.2663032 , -3.18923426,  3.43866735,
       -0.31073057,  0.        ,  0.        ,  0.        , -0.        ,
       -0.        , -0.02050749,  0.08764271, -0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        , -0.        , -0.        , -0.        ,  0.        ,
        0.0088969 , -0.        , -0.        ,  0.        ,  0.        ,
        0.01033616,  0.        ,  0.        ,  0.        ,  0.32445235,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.10144876,  0.        ,  0.        ,  0.        ])

In [183]:
ecm_df = pd.DataFrame(elastic_cv_model.coef_, columns=["elastic_coef_(alpha:0.01, l1:1)"])

In [184]:
pd.concat([em_df, ecm_df], axis = 1)

Unnamed: 0,"elastic_coef_(alpha:1, l1:0.5)","elastic_coef_(alpha:0.01, l1:1)"
0,0.8699,4.612887
1,0.311544,0.459273
2,0.0,0.266303
3,0.299733,-3.189234
4,0.97037,3.438667
5,0.043061,-0.310731
6,0.082779,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,-0.0


### Grid Search 

In [185]:
from sklearn.model_selection import GridSearchCV

In [186]:
elastic_model = ElasticNet(max_iter=10000, random_state=42)

In [187]:
param_grid = {"alpha":[0.01, 0.012, 0.2, 0.5, 0.6, 0.7, 1],
            "l1_ratio":[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1]}

In [188]:
grid_model = GridSearchCV(estimator = elastic_model, param_grid = param_grid, scoring = 'neg_root_mean_squared_error',
                         cv=5, verbose=2)

In [189]:
grid_model.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 49 candidates, totalling 245 fits
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.5; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.5; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.5; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.5; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.5; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.7; total time=   0.0s
[CV] END ...........................alpha=0.01,

GridSearchCV(cv=5, estimator=ElasticNet(max_iter=10000, random_state=42),
             param_grid={'alpha': [0.01, 0.012, 0.2, 0.5, 0.6, 0.7, 1],
                         'l1_ratio': [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1]},
             scoring='neg_root_mean_squared_error', verbose=2)

In [190]:
grid_model.best_params_

{'alpha': 0.01, 'l1_ratio': 1}

In [191]:
pd.DataFrame(grid_model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.016907,0.007634,0.000386,6e-05,0.01,0.1,"{'alpha': 0.01, 'l1_ratio': 0.1}",-0.518456,-0.714007,-0.684389,-0.524966,-1.272285,-0.742821,0.276542,14
1,0.003153,0.000974,0.000306,4e-05,0.01,0.5,"{'alpha': 0.01, 'l1_ratio': 0.5}",-0.490858,-0.543527,-0.655183,-0.500694,-1.230979,-0.684248,0.279519,11
2,0.002303,0.000163,0.000269,8e-06,0.01,0.7,"{'alpha': 0.01, 'l1_ratio': 0.7}",-0.478381,-0.521565,-0.637412,-0.489429,-1.21807,-0.668971,0.280283,9
3,0.003281,0.000175,0.000284,4.1e-05,0.01,0.9,"{'alpha': 0.01, 'l1_ratio': 0.9}",-0.463831,-0.521283,-0.613812,-0.472519,-1.193579,-0.653005,0.275488,5
4,0.003493,0.000274,0.00028,7e-06,0.01,0.95,"{'alpha': 0.01, 'l1_ratio': 0.95}",-0.462007,-0.518117,-0.604001,-0.466589,-1.17639,-0.645421,0.27036,3
5,0.004115,0.000884,0.000285,2.1e-05,0.01,0.99,"{'alpha': 0.01, 'l1_ratio': 0.99}",-0.456287,-0.512238,-0.595029,-0.455268,-1.166363,-0.637037,0.269531,2
6,0.004115,0.000753,0.00029,1.8e-05,0.01,1.0,"{'alpha': 0.01, 'l1_ratio': 1}",-0.455932,-0.511051,-0.592362,-0.451495,-1.165487,-0.635265,0.26994,1
7,0.011298,0.005329,0.000292,2.3e-05,0.012,0.1,"{'alpha': 0.012, 'l1_ratio': 0.1}",-0.514829,-0.658855,-0.705831,-0.54101,-1.292208,-0.742547,0.283885,13
8,0.002779,0.000266,0.000328,6.6e-05,0.012,0.5,"{'alpha': 0.012, 'l1_ratio': 0.5}",-0.484481,-0.53817,-0.6812,-0.512026,-1.246364,-0.692448,0.28515,12
9,0.002785,0.000375,0.000291,2.4e-05,0.012,0.7,"{'alpha': 0.012, 'l1_ratio': 0.7}",-0.472428,-0.530224,-0.666661,-0.509201,-1.23008,-0.681719,0.281937,10


In [192]:
grid_model.best_index_

6

In [193]:
grid_model.best_score_

-0.6352653285194695

## Using Best Hyperparameters from GridSearch

In [194]:
y_pred = grid_model.predict(X_test_scaled)
y_train_pred = grid_model.predict(X_train_scaled)

In [195]:
train_val(y_train, y_train_pred, y_test, y_pred, "GridSearch")

Unnamed: 0,GridSearch_train,GridSearch_test
R2,0.98748,0.985852
mae,0.347294,0.50324
mse,0.328546,0.386314
rmse,0.573189,0.621542


## Final Model

In [196]:
final_scaler = StandardScaler()
X_scaled = final_scaler.fit_transform(poly_features)

In [197]:
from sklearn.linear_model import Lasso
final_model = Lasso(alpha=0.01)

In [198]:
final_model.fit(X_scaled, y)

Lasso(alpha=0.01)