# Linear , Ridge and Lasso Regression with GridSearchCV on Boston Housing Dataset
Byju N Govindan 

In [1]:

#Ridge:     Ridge regression addresses some of the problems of Ordinary Least Squares by imposing a penalty on the size of the coefficients with l2 regularization.
#Lasso:     The Lasso is a linear model that estimates sparse coefficients with l1 regularization.
#ElasticNet: Elastic-Net is a linear regression model trained with both l1 and l2 -norm regularization of the coefficients.


In [2]:
# housing price dataset
from sklearn.datasets import load_boston

In [3]:
#import requird libraries

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
df = load_boston()
dataset = pd.DataFrame(df.data)
dataset.columns = df.feature_names
dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [5]:
dataset['Price'] = df.target
dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [6]:
# Assume you have dataframe named "dataset" loaded with multiple records and columns.
# How will you access the independent variables (assume all but last column) and dependent variable (assume, the last column)

In [7]:
#Divide the dataset into independent and dependent features
X = dataset.iloc[:, :-1] ## access the independent variables ; all rows all but the last columnin the dataframe
y = dataset.iloc[:, -1] ## Dependent variable ; all rows of just the last column in the dataframe

In [8]:
# Linear Regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
lin_reg = LinearRegression()
mse = cross_val_score(lin_reg, X, y, scoring = 'neg_mean_squared_error', cv = 10) 
# first parameter is model, then independent variable and dependent variable
#Ideally you split the dataset to train and test dataset
# Then only give the traindatset only for crossvalidation.
# Here I am being lazy and just applying crossvalidation on entire dataset
#for scoring, you may give mean squared error or negative mean squared error etc...

mean_mse =np.mean(mse) # Average of all 5 cross validations
print(mean_mse)

-34.705255944524815


In [9]:
# if you have  split data to train and test data, you can do prediction on test data as
#lin_reg.predict(X_test)

In [10]:
#Ridge Regression
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV # Hyperparamter tuning
ridge = Ridge()

params = {'alpha': [1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 100]}

ridge_regressor = GridSearchCV(ridge, params, scoring = 'neg_mean_squared_error', cv = 10) 
ridge_regressor.fit(X, y)

GridSearchCV(cv=10, estimator=Ridge(),
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100]},
             scoring='neg_mean_squared_error')

In [11]:
print(ridge_regressor.best_params_)
print(ridge_regressor.best_score_)

{'alpha': 100}
-29.615220097335175


In [12]:
# We did ridge regression to avoid overfitting. Yet, With GridSearchCV, we got a higher mean_mse.
#  Means Ridge regression is not doing better job than Linear Regression

In [13]:
#Lets try Lasso now.  #Lasso Regression
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV # Hyperparamter tuning
lasso = Lasso()

params = {'alpha': [1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 100]}

lasso_regressor = GridSearchCV(lasso, params, scoring = 'neg_mean_squared_error', cv = 10) 
lasso_regressor.fit(X, y)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


GridSearchCV(cv=10, estimator=Lasso(),
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100]},
             scoring='neg_mean_squared_error')

In [14]:
print(lasso_regressor.best_params_)
print(lasso_regressor.best_score_)

{'alpha': 0.01}
-34.45554381307912


In [15]:
# Lasso regression also did not yield us a better fit than Linear regression. 


In [16]:
# Understand the mean_mse value must go towards zero to have the best model



In [17]:
#Now lets split dataset into train and test datasets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

In [18]:
# Linear Regression
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
mse = cross_val_score(lin_reg, X_train, y_train, scoring = 'neg_mean_squared_error', cv = 10) 
mean_mse =np.mean(mse) # Average of all 5 cross validations
print(mean_mse)


-25.473094575615903


In [19]:
 #Lasso Regression with train_test_split dataset
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV # Hyperparamter tuning
lin_regressor = LinearRegression()

params = {'alpha': [1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 100]}

lin_regressor = GridSearchCV(lasso, params, scoring = 'neg_mean_squared_error', cv = 10) 
lin_regressor.fit(X_train, y_train)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


GridSearchCV(cv=10, estimator=Lasso(),
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100]},
             scoring='neg_mean_squared_error')

In [20]:
lin_regressor.score(X_train, y_train)

-22.985015840300843

In [21]:
print(lin_regressor.best_params_)
print(lin_regressor.best_score_)

{'alpha': 1e-08}
-25.473094572833237


In [22]:
# if you have  split data to train and test data, you can do prediction on test data as
lin_regressor.predict(X_test)

array([28.53469457, 36.61870038, 15.63751051, 25.50144953, 18.70967356,
       23.16471553, 17.31011033, 14.0773636 , 23.01064349, 20.5422349 ,
       24.91632311, 18.41098048, -6.52079694, 21.83372577, 19.14903066,
       26.05873213, 20.30232607,  5.74943563, 40.33137805, 17.4579146 ,
       27.47486675, 30.21707564, 10.80555628, 23.8772175 , 17.99492226,
       16.02608761, 23.26828778, 14.36825218, 22.38116931, 19.30920694,
       22.17284558, 25.05925451, 25.13780726, 18.46730239, 16.60405678,
       17.46564111, 30.71367735, 20.05106816, 23.98977653, 24.94322399,
       13.97945361, 31.64706961, 42.48057194, 17.70042803, 26.92507866,
       17.15897728, 13.68918092, 26.14924236, 20.27823036, 29.99003508,
       21.21260346, 34.03649177, 15.41837559, 25.95781066, 39.13897287,
       22.9611842 , 18.8031058 , 33.07865363, 24.74384153, 12.83640948,
       22.41963416, 30.64804998, 31.5956712 , 16.34088222, 20.95043064,
       16.70145827, 20.23215651, 26.1437865 , 31.12160899, 11.89

In [23]:
#Ridge Regression
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV # Hyperparamter tuning
ridge = Ridge()

params = {'alpha': [1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 100]}

ridge_regressor = GridSearchCV(ridge, params, scoring = 'neg_mean_squared_error', cv = 10) 
ridge_regressor.fit(X_train, y_train)

GridSearchCV(cv=10, estimator=Ridge(),
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100]},
             scoring='neg_mean_squared_error')

In [24]:
print(ridge_regressor.best_params_)
print(ridge_regressor.best_score_)

{'alpha': 0.01}
-25.472067363367756


In [25]:
# if you have  split data to train and test data, you can do prediction on test data as
ridge_regressor.predict(X_test)

array([28.52859307, 36.61391924, 15.62446448, 25.49794339, 18.71651761,
       23.14663912, 17.30883131, 14.07475692, 22.99166445, 20.54553336,
       24.90263157, 18.40809393, -6.52400321, 21.82072889, 19.14851303,
       26.0555678 , 20.29340873,  5.74691746, 40.3292144 , 17.46378034,
       27.47957205, 30.21416421, 10.80664516, 23.88716488, 18.00126984,
       16.01130598, 23.25744349, 14.37364861, 22.3671398 , 19.31468367,
       22.16363415, 25.06329806, 25.13823978, 18.48393229, 16.58826798,
       17.49443878, 30.7138197 , 20.06344919, 23.98208654, 24.9392659 ,
       13.98171769, 31.64424572, 42.47600973, 17.69471466, 26.92334773,
       17.1620035 , 13.6905632 , 26.145616  , 20.26656429, 29.99670048,
       21.21064505, 34.03300557, 15.42093099, 25.95970143, 39.14516313,
       22.95869281, 18.81261673, 33.07917693, 24.74240933, 12.83096178,
       22.42715454, 30.65664806, 31.59958777, 16.35222889, 20.96015475,
       16.68403699, 20.23389533, 26.14354346, 31.12585147, 11.90

In [26]:
 #Lasso Regression with train_test_split dataset
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV # Hyperparamter tuning
lasso = Lasso()

params = {'alpha': [1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 100]}

lasso_regressor = GridSearchCV(lasso, params, scoring = 'neg_mean_squared_error', cv = 10) 
lasso_regressor.fit(X_train, y_train)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


GridSearchCV(cv=10, estimator=Lasso(),
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100]},
             scoring='neg_mean_squared_error')

In [27]:
print(lasso_regressor.best_params_)
print(lasso_regressor.best_score_)

{'alpha': 1e-08}
-25.473094572833237


In [28]:
# if you have  split data to train and test data, you can do prediction on test data as
lasso_regressor.predict(X_test)

array([28.53469457, 36.61870038, 15.63751051, 25.50144953, 18.70967356,
       23.16471553, 17.31011033, 14.0773636 , 23.01064349, 20.5422349 ,
       24.91632311, 18.41098048, -6.52079694, 21.83372577, 19.14903066,
       26.05873213, 20.30232607,  5.74943563, 40.33137805, 17.4579146 ,
       27.47486675, 30.21707564, 10.80555628, 23.8772175 , 17.99492226,
       16.02608761, 23.26828778, 14.36825218, 22.38116931, 19.30920694,
       22.17284558, 25.05925451, 25.13780726, 18.46730239, 16.60405678,
       17.46564111, 30.71367735, 20.05106816, 23.98977653, 24.94322399,
       13.97945361, 31.64706961, 42.48057194, 17.70042803, 26.92507866,
       17.15897728, 13.68918092, 26.14924236, 20.27823036, 29.99003508,
       21.21260346, 34.03649177, 15.41837559, 25.95781066, 39.13897287,
       22.9611842 , 18.8031058 , 33.07865363, 24.74384153, 12.83640948,
       22.41963416, 30.64804998, 31.5956712 , 16.34088222, 20.95043064,
       16.70145827, 20.23215651, 26.1437865 , 31.12160899, 11.89

In [29]:
y_pred = lasso_regressor.predict(X_test)
from sklearn.metrics import r2_score

r2_score_lasso = r2_score(y_pred, y_test)
print(r2_score_lasso)


0.6709558959121944


In [30]:
y_pred = ridge_regressor.predict(X_test)
r2_score_ridge = r2_score(y_pred, y_test)
print(r2_score_ridge)

0.670874325753307


In [31]:
y_pred = lin_regressor.predict(X_test)
r2_score_linear = r2_score(y_pred, y_test)
print(r2_score_linear)

0.6709558959121944
