In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
df = pd.read_csv('../dataset/boston.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
df.shape

(506, 15)

In [4]:
df.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [5]:
df.columns

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'Price'],
      dtype='object')

In [6]:
x = df.drop(columns=['Price'], axis=1)
y = df['Price']

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20, random_state=0)

### Linear Regression

In [8]:
lr = LinearRegression()
lr.fit(x_train, y_train)

In [9]:
y_pred1 = lr.predict(x_test)
r2_score(y_test, y_pred1)

0.5892223849182503

### Ridge & Lasso

In [10]:
from sklearn.linear_model import Ridge, Lasso

ridge_model = Ridge(alpha=1.0)
ridge_model.fit(x_train, y_train)
y_pred_ridge = ridge_model.predict(x_test)
r2_model_ridge = r2_score(y_test, y_pred_ridge)
print("R2 Score of the Ridge Model with alpha 1.0 is : ", r2_model_ridge)

R2 Score of the Ridge Model with alpha 1.0 is :  0.5796111714164923


In [11]:
ridge_model = Ridge(alpha=0.1)
ridge_model.fit(x_train, y_train)
y_pred_ridge = ridge_model.predict(x_test)
r2_model_ridge = r2_score(y_test, y_pred_ridge)

print("R2 Score of the Ridge Model with alpha 0.1 is : ", r2_model_ridge)

R2 Score of the Ridge Model with alpha 0.1 is :  0.5880003503393505


### Lasso

In [16]:
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(x_train, y_train)
y_pred_lasso = lasso_model.predict(x_test)
r2_model_lasso = r2_score(y_test, y_pred_lasso)

print("R2 Score of the Lasso Model with alpha 0.1 is : ", r2_model_lasso)

R2 Score of the Lasso Model with alpha 0.1 is :  0.5569747742783787


In [18]:
# Identify the cofficients with bad slope result

bad_feature = np.where(lasso_model.coef_ == 0)[0]
list(x.columns[bad_feature])

['NOX']

In [19]:
# Remove this feature

x_train_filterd = x_train.drop(x_train.columns[bad_feature], axis=1)
x_test_filterd = x_test.drop(x_test.columns[bad_feature], axis=1)

In [21]:
model_filterd = Lasso(alpha=0.1)
model_filterd.fit(x_train_filterd, y_train)
y_pred_lasso = model_filterd.predict(x_test_filterd)
r2_model_lasso = r2_score(y_test, y_pred_lasso)

print("R2 Score of the Lasso Model with alpha 0.1 is : ", r2_model_lasso)

R2 Score of the Lasso Model with alpha 0.1 is :  0.5569747742783786
