# Lasso And Ridge Regularization

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures

data = pd.read_csv('boston.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [2]:
data.drop("Unnamed: 0", axis=1, inplace=True)
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
data.shape

(506, 14)

In [4]:
data.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
Price      0
dtype: int64

In [5]:
train_set, test_set = train_test_split(data, test_size=0.2, random_state=42)
print(train_set.shape)
print(test_set.shape)

(404, 14)
(102, 14)


In [6]:
X_train = train_set.drop("Price", axis=1)
y_train = train_set['Price']
X_test = test_set.drop("Price", axis=1)
y_test = test_set['Price']

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(404, 13)
(404,)
(102, 13)
(102,)


In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  =scaler.transform(X_test)

In [8]:
reg_model = LinearRegression()
reg_model.fit(X_train_scaled, y_train)
print ("Linear model:", (reg_model.coef_))

lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
print ("Lasso model:", (lasso.coef_))

ridge = Ridge(alpha=0.1)
ridge.fit(X_train_scaled, y_train)
print ("Ridge model:", (ridge.coef_))

Linear model: [-1.00213533  0.69626862  0.27806485  0.7187384  -2.0223194   3.14523956
 -0.17604788 -3.0819076   2.25140666 -1.76701378 -2.03775151  1.12956831
 -3.61165842]
Lasso model: [-0.71836455  0.25962714 -0.          0.69822096 -1.56814243  3.27150693
 -0.         -2.28444944  0.67193802 -0.3566537  -1.89333519  1.03136581
 -3.60941047]
Ridge model: [-1.00111591  0.69436316  0.27539404  0.71912548 -2.01912122  3.14590087
 -0.17617627 -3.07816919  2.24333232 -1.75959591 -2.03674427  1.12933027
 -3.61037565]


In [9]:
print("Linear Regression Model Training Score: ", reg_model.score(X_train_scaled, y_train))
print("Linear Regression Model Testing Score: ",reg_model.score(X_test_scaled, y_test))
print()
print("Ridge Regression Model Training Score: ",ridge.score(X_train_scaled, y_train))
print("Ridge Regression Model Testing Score: ",ridge.score(X_test_scaled, y_test))
print()
print("Lasso Regression Model Training Score: ",lasso.score(X_train_scaled, y_train))
print("Lasso Regression Model Testing Score: ",lasso.score(X_test_scaled, y_test))

Linear Regression Model Training Score:  0.7508856358979673
Linear Regression Model Testing Score:  0.668759493535632

Ridge Regression Model Training Score:  0.7508854741163441
Ridge Regression Model Testing Score:  0.6687298368808312

Lasso Regression Model Training Score:  0.7450370791852488
Lasso Regression Model Testing Score:  0.6501375183238987


The results are almost identical but with less complexity of the models. We will now create a polynomial regression model by creating new features from the features followed by transforming the data

In [10]:
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_poly)
X_test_scaled  =scaler.transform(X_test_poly)

print(X_train_scaled.shape)
print(X_test_scaled.shape)

(404, 105)
(102, 105)


In [11]:
reg_model = LinearRegression()
reg_model.fit(X_train_scaled, y_train)
print ("Linear model:", (reg_model.coef_))

lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
print ("Lasso model:", (lasso.coef_))

ridge = Ridge(alpha=30)
ridge.fit(X_train_scaled, y_train)
print ("Ridge model:", (ridge.coef_))

Linear model: [ 2.09595016e-11 -6.42920530e+01  1.52112064e+01 -3.25121792e+01
  9.49135961e+00  3.36010216e+01  1.26669624e+01  1.83829850e+01
 -8.64724047e+00  2.26049364e+01  3.78272080e-01  1.70377286e+01
  1.27082025e+01 -1.37765253e+00  2.25998895e-01  1.90418618e-01
  9.02348090e+01  2.29774549e+00 -1.21412085e+01  1.01723957e+01
 -1.82084846e+00 -1.06508771e+00  6.70989855e+01 -2.11420775e+02
  1.17000453e+02 -1.17607874e+00  6.19035801e+00 -7.82511615e-01
 -3.97300370e-01 -2.28808076e-01 -1.31942510e+01 -5.51619520e-01
  9.95876878e-01 -3.67644887e+00 -1.53781596e+00  6.27397511e+00
 -1.20885912e+00  7.31599298e-01 -1.58773362e+00  9.18827002e+00
 -1.01191055e-01 -1.09750638e+00  1.38474816e+01  3.18030523e+00
  2.14955634e+00 -5.73961017e+00  9.79861050e-01 -2.24573690e+00
  8.05707681e+00 -3.04303113e+00  9.49135961e+00 -5.98888720e+00
 -9.06158985e+00 -7.29966041e-01 -7.42346339e-01  4.83005235e-01
 -8.87256356e-01 -3.79100204e+00  1.09762617e+00 -8.85151006e-01
 -1.4053106

In [12]:
print("Linear Regression Model Training Score: ", reg_model.score(X_train_scaled, y_train))
print("Linear Regression Model Testing Score: ",reg_model.score(X_test_scaled, y_test))
print()
print("Ridge Regression Model Training Score: ",ridge.score(X_train_scaled, y_train))
print("Ridge Regression Model Testing Score: ",ridge.score(X_test_scaled, y_test))
print()
print("Lasso Regression Model Training Score: ",lasso.score(X_train_scaled, y_train))
print("Lasso Regression Model Testing Score: ",lasso.score(X_test_scaled, y_test))

Linear Regression Model Training Score:  0.9409317027113498
Linear Regression Model Testing Score:  0.8055829447972147

Ridge Regression Model Training Score:  0.8502074967229956
Ridge Regression Model Testing Score:  0.7945568182612386

Lasso Regression Model Training Score:  0.8407156586155267
Lasso Regression Model Testing Score:  0.787881150824475


Regularization is done to control the performance of the model and to avoid the model to get overfitted