### Packages Required

In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn import metrics

from sklearn import linear_model
from sklearn.linear_model import LinearRegression

### Dataset

In [6]:
df = pd.read_csv('C:/Users/Nithin/Downloads/Regularizationn/Datasets/BostonData.csv')

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 15 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   CRIM      506 non-null    float64
 1   ZN        506 non-null    float64
 2   INDUS     506 non-null    float64
 3   CHAS      506 non-null    int64  
 4   NOX       506 non-null    float64
 5   RM        506 non-null    float64
 6   AGE       506 non-null    float64
 7   DIS       506 non-null    float64
 8   RAD       506 non-null    int64  
 9   TAX       506 non-null    float64
 10  PTRATIO   506 non-null    float64
 11  B         506 non-null    float64
 12  LSTAT     506 non-null    float64
 13  Price     506 non-null    float64
 14  ln_Price  506 non-null    float64
dtypes: float64(13), int64(2)
memory usage: 59.4 KB


### Train & Test Data

In [43]:
X = df.iloc[:,0:13]
Y = df.iloc[:,14]

In [46]:
train_x,test_x,train_y,test_y = train_test_split(X,Y,test_size=0.3,random_state=123)

## Without Regularization

##### Linear Regression (OLS)

In [47]:
LNR = LinearRegression()
LNR.fit(train_x,train_y)

LinearRegression()

In [52]:
co_eff_nonreg = pd.DataFrame(LNR.coef_,train_x.columns,columns=['co_eff_nonreg'])
co_eff_nonreg

Unnamed: 0,co_eff_nonreg
CRIM,-0.04037853
ZN,0.0002212206
INDUS,0.003214572
CHAS,1.328104e-14
NOX,-0.7840635
RM,0.1563129
AGE,-0.001036088
DIS,-0.05492814
RAD,0.01921713
TAX,-0.0004553392


##### Linear Regression (Ridge)

In [53]:
RID = linear_model.Ridge(alpha=0.05)
RID.fit(train_x,train_y)

Ridge(alpha=0.05)

In [54]:
co_eff_ridge = pd.DataFrame(RID.coef_,train_x.columns,columns=['co_eff_ridge'])
co_eff_ridge

Unnamed: 0,co_eff_ridge
CRIM,-0.040803
ZN,0.000197
INDUS,0.003012
CHAS,0.0
NOX,-0.70191
RM,0.157166
AGE,-0.001089
DIS,-0.052959
RAD,0.019131
TAX,-0.00046


##### Linear Regression (Lasso)

In [55]:
LAS = linear_model.Lasso(alpha=0.05)
LAS.fit(train_x,train_y)

Lasso(alpha=0.05)

In [56]:
co_eff_lasso = pd.DataFrame(LAS.coef_,train_x.columns,columns=['co_eff_lasso'])
co_eff_lasso

Unnamed: 0,co_eff_lasso
CRIM,-0.0
ZN,0.000463
INDUS,-0.0
CHAS,0.0
NOX,-0.0
RM,0.0
AGE,0.000422
DIS,-0.0
RAD,0.0023
TAX,-0.000375


##### Linear Regression (Elastic Net)

In [57]:
ELN = linear_model.ElasticNet(alpha=0.05)
ELN.fit(train_x,train_y)

ElasticNet(alpha=0.05)

In [58]:
co_eff_elastic = pd.DataFrame(ELN.coef_,train_x.columns,columns=['co_eff_elastic'])
co_eff_elastic

Unnamed: 0,co_eff_elastic
CRIM,-0.0
ZN,0.000186
INDUS,-0.0
CHAS,0.0
NOX,-0.0
RM,0.0
AGE,0.00058
DIS,-0.000675
RAD,0.005235
TAX,-0.000434


In [62]:
co_eff = pd.concat([co_eff_nonreg,co_eff_ridge,co_eff_lasso,co_eff_elastic],axis = 1)
co_eff

Unnamed: 0,co_eff_nonreg,co_eff_ridge,co_eff_lasso,co_eff_elastic
CRIM,-0.04037853,-0.040803,-0.0,-0.0
ZN,0.0002212206,0.000197,0.000463,0.000186
INDUS,0.003214572,0.003012,-0.0,-0.0
CHAS,1.328104e-14,0.0,0.0,0.0
NOX,-0.7840635,-0.70191,-0.0,-0.0
RM,0.1563129,0.157166,0.0,0.0
AGE,-0.001036088,-0.001089,0.000422,0.00058
DIS,-0.05492814,-0.052959,-0.0,-0.000675
RAD,0.01921713,0.019131,0.0023,0.005235
TAX,-0.0004553392,-0.00046,-0.000375,-0.000434


## With Regularization

##### Linear Regression (OLS)

In [79]:
LNR_reg = linear_model.LinearRegression()
LNR_reg.fit(train_x,train_y)
LNR_reg_predict = LNR_reg.predict(test_x)
LNR_reg_acc = metrics.r2_score(test_y,LNR_reg_predict)

In [80]:
co_eff_reg = pd.DataFrame(LNR_reg.coef_,train_x.columns,columns=['co_eff_reg'])
co_eff_reg = co_eff_reg.reset_index()
co_eff_reg.columns = ['feat','co_eff_reg']
LNR_reg_selected = co_eff_reg[co_eff_reg['co_eff_reg']!=0].feat.unique()
LNR_reg_selected

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype=object)

In [81]:
print('Accuracy achieved using Linear Regression (Non-Regularized is:',round(LNR_reg_acc,2),"while the number of features used are",len(LNR_reg_selected),"with the selected features being",LNR_reg_selected)

Accuracy achieved using Linear Regression (Non-Regularized is: 0.76 while the number of features used are 13 with the selected features being ['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']


##### Linear Regression (Ridge)

In [82]:
LNR_reg_rid = linear_model.Ridge(alpha=0.1)
LNR_reg_rid.fit(train_x,train_y)
LNR_reg_rid_predict = LNR_reg_rid.predict(test_x)
LNR_reg_rid_acc = metrics.r2_score(test_y,LNR_reg_rid_predict)

In [83]:
co_eff_reg_rid = pd.DataFrame(LNR_reg_rid.coef_,train_x.columns,columns=['co_eff_reg_rid'])
co_eff_reg_rid = co_eff_reg_rid.reset_index()
co_eff_reg_rid.columns = ['feat','co_eff_reg_rid']
LNR_reg_rid_selected = co_eff_reg_rid[co_eff_reg_rid['co_eff_reg_rid']!=0].feat.unique()
LNR_reg_rid_selected

array(['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'], dtype=object)

In [84]:
print('Accuracy achieved using Linear Regression (Non-Regularized is:',round(LNR_reg_rid_acc,2),"while the number of features used are",len(LNR_reg_rid_selected),"with the selected features being",LNR_reg_rid_selected)

Accuracy achieved using Linear Regression (Non-Regularized is: 0.76 while the number of features used are 12 with the selected features being ['CRIM' 'ZN' 'INDUS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO' 'B'
 'LSTAT']


##### Linear Regression (lasso)

In [91]:
LNR_reg_las = linear_model.Lasso(alpha=0.01)
LNR_reg_las.fit(train_x,train_y)
LNR_reg_las_predict = LNR_reg_las.predict(test_x)
LNR_reg_las_acc = metrics.r2_score(test_y,LNR_reg_las_predict)

In [92]:
co_eff_reg_las = pd.DataFrame(LNR_reg_las.coef_,train_x.columns,columns=['co_eff_reg_las'])
co_eff_reg_las = co_eff_reg_las.reset_index()
co_eff_reg_las.columns = ['feat','co_eff_reg_las']
LNR_reg_las_selected = co_eff_reg_las[co_eff_reg_las['co_eff_reg_las']!=0].feat.unique()
LNR_reg_las_selected

array(['CRIM', 'ZN', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B',
       'LSTAT'], dtype=object)

In [93]:
print('Accuracy achieved using Linear Regression (Non-Regularized is:',round(LNR_reg_las_acc,2),"while the number of features used are",len(LNR_reg_las_selected),"with the selected features being",LNR_reg_las_selected)

Accuracy achieved using Linear Regression (Non-Regularized is: 0.76 while the number of features used are 10 with the selected features being ['CRIM' 'ZN' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO' 'B' 'LSTAT']


##### Linear Regression (elastic net)

In [94]:
LNR_reg_elast = linear_model.ElasticNet(alpha=0.01)
LNR_reg_elast.fit(train_x,train_y)
LNR_reg_elast_predict = LNR_reg_elast.predict(test_x)
LNR_reg_elast_acc = metrics.r2_score(test_y,LNR_reg_elast_predict)

In [95]:
co_eff_reg_elast = pd.DataFrame(LNR_reg_elast.coef_,train_x.columns,columns=['co_eff_reg_elast'])
co_eff_reg_elast = co_eff_reg_elast.reset_index()
co_eff_reg_elast.columns = ['feat','co_eff_reg_elast']
LNR_reg_elast_selected = co_eff_reg_elast[co_eff_reg_elast['co_eff_reg_elast']!=0].feat.unique()
LNR_reg_elast_selected

array(['CRIM', 'ZN', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B',
       'LSTAT'], dtype=object)

In [96]:
print('Accuracy achieved using Linear Regression (Non-Regularized is:',round(LNR_reg_elast_acc,2),"while the number of features used are",len(LNR_reg_elast_selected),"with the selected features being",LNR_reg_elast_selected)

Accuracy achieved using Linear Regression (Non-Regularized is: 0.76 while the number of features used are 10 with the selected features being ['CRIM' 'ZN' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO' 'B' 'LSTAT']
