In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error

In [3]:
df=pd.read_excel("housing.csv.xlsx")

In [4]:
df.head()

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0


In [5]:
df.describe()

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
count,489.0,489.0,489.0,489.0
mean,6.240288,12.939632,18.516564,454342.9
std,0.64365,7.08199,2.111268,165340.3
min,3.561,1.98,12.6,105000.0
25%,5.88,7.37,17.4,350700.0
50%,6.185,11.69,19.1,438900.0
75%,6.575,17.12,20.2,518700.0
max,8.398,37.97,22.0,1024800.0


### Splitting data into training and testing data

In [6]:
X = df.drop('MEDV', axis=1)

y = df['MEDV']

from sklearn import preprocessing

X = preprocessing.scale(X)

y = preprocessing.scale(y)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

print(X_train.shape)

print(y_train.shape)

print(X_test.shape)

print(y_test.shape)

(342, 3)
(342,)
(147, 3)
(147,)


preprocessing.scale(X)

The preprocessing.scale() algorithm puts your data on one scale. This is helpful with largely sparse datasets. In simple words, your data is vastly spread out. For example the values of X maybe like so:

X = [1, 4, 400, 10000, 100000]

The issue with sparsity is that it very biased or in statistical terms skewed. So, therefore, scaling the data brings all your values onto one scale eliminating the sparsity.

Scaling the data brings all your values onto one scale eliminating the sparsity and it follows the same concept of Normalization and Standardization. To see the effect, you can call describe on the dataframe before and after processing:

In [7]:
#with X is already pre-proccessed 
df2 = pd.DataFrame(X)
df2.describe()

Unnamed: 0,0,1,2
count,489.0,489.0,489.0
mean,-8.918111e-16,9.081579e-18,1.522073e-15
std,1.001024,1.001024,1.001024
min,-4.166912,-1.54912,-2.805245
25%,-0.5603316,-0.7872555,-0.5294013
50%,-0.08598615,-0.1766328,0.2766267
75%,0.5205539,0.5908861,0.7981743
max,3.35574,3.537989,1.651616


In [8]:
regression_model = LinearRegression()

regression_model.fit(X_train, y_train)

LinearRegression()

In [19]:
ridge = Ridge(alpha=0.2)

ridge.fit(X_train,y_train)

print ("Ridge model:", (ridge.coef_))

Ridge model: [ 0.31037614 -0.4601965  -0.2583435 ]


In [20]:
ridge = Ridge(alpha=0.3)

ridge.fit(X_train,y_train)

print ("Ridge model:", (ridge.coef_))

Ridge model: [ 0.31037007 -0.46008909 -0.25831074]


In [21]:
ridge = Ridge(alpha=0.4)

ridge.fit(X_train,y_train)

print ("Ridge model:", (ridge.coef_))

Ridge model: [ 0.31036395 -0.45998177 -0.25827798]


if we incresess alpha greater than 0.3 then we see 2nd term of ridge model is decresess

### The optimization objective for Ridge is:
||y - Xw||^2_2 + alpha * ||w||^2_2

In [15]:
lasso = Lasso(alpha=0.1)

lasso.fit(X_train,y_train)

print ("Lasso model:", (lasso.coef_))

Lasso model: [ 0.26550839 -0.42151352 -0.18794559]


In [22]:
lasso = Lasso(alpha=0.2)

lasso.fit(X_train,y_train)

print ("Lasso model:", (lasso.coef_))

Lasso model: [ 0.2206233  -0.38261712 -0.11748345]


In [23]:
lasso = Lasso(alpha=0.4)

lasso.fit(X_train,y_train)

print ("Lasso model:", (lasso.coef_))

Lasso model: [ 0.1265626  -0.29924965 -0.        ]


if alpha is increses upto 0.4 then 3rd term is getting zero 

### The optimization objective for Lasso is:
(1 / (2 n_samples)) ||y - Xw||^2_2 + alpha * ||w||_1

### Result on Train and Test Dataset

In [26]:
print("Linear Regression Model Training Score: ", regression_model.score(X_train, y_train))

print("Linear Regression Model Testing Score: ",regression_model.score(X_test, y_test))

print("Ridge Regression Model Training Score: ",ridge.score(X_train, y_train))

print("Ridge Regression Model Testing Score: ",ridge.score(X_test, y_test))

print("Lasso Regression Model Training Score: ",lasso.score(X_train, y_train))

print("Lasso Regression Model Testing Score: ",lasso.score(X_test, y_test))

Linear Regression Model Training Score:  0.7105567540949003
Linear Regression Model Testing Score:  0.7281579138457908
Ridge Regression Model Training Score:  0.7105564869331338
Ridge Regression Model Testing Score:  0.7281027886946532
Lasso Regression Model Training Score:  0.4846935618898719
Lasso Regression Model Testing Score:  0.4634547006315497


In [27]:
print("Linear Regression Model Coefficient :",regression_model.coef_)
print("Ridge Regression Model Coefficient :",ridge.coef_)
print("Lasso Regression Model Coefficient :",lasso.coef_)

Linear Regression Model Coefficient : [ 0.31038811 -0.46041158 -0.25840905]
Ridge Regression Model Coefficient : [ 0.31036395 -0.45998177 -0.25827798]
Lasso Regression Model Coefficient : [ 0.1265626  -0.29924965 -0.        ]


###  Fitting Polynomial Regression

In [28]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree = 2, interaction_only=True)

X_poly = poly.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.30, random_state=1)

regression_model.fit(X_train, y_train)

print(regression_model.coef_[0])

0.0


In [29]:
ridge = Ridge(alpha=.3)

ridge.fit(X_train,y_train)

print ("Ridge model:", (ridge.coef_))

Ridge model: [ 0.          0.27107574 -0.57159397 -0.17077965 -0.20051563 -0.18439328
 -0.14349348]


In [30]:
ridge = Ridge(alpha=.4)

ridge.fit(X_train,y_train)

print ("Ridge model:", (ridge.coef_))

Ridge model: [ 0.          0.27111034 -0.57137283 -0.17081753 -0.20042061 -0.18436782
 -0.14341041]


In [31]:
ridge = Ridge(alpha=.6)

ridge.fit(X_train,y_train)

print ("Ridge model:", (ridge.coef_))

Ridge model: [ 0.          0.2711791  -0.57093142 -0.170893   -0.2002309  -0.18431688
 -0.1432444 ]


all term are remains same if we increses alpha upto 0.6

In [33]:
lasso = Lasso(alpha=0.003)

lasso.fit(X_train,y_train)

print ("Lasso model:", (lasso.coef_))

Lasso model: [ 0.          0.26858583 -0.571742   -0.16961397 -0.19866367 -0.17956566
 -0.13553868]


In [34]:
lasso = Lasso(alpha=0.002)

lasso.fit(X_train,y_train)

print ("Lasso model:", (lasso.coef_))

Lasso model: [ 0.          0.2693995  -0.57189417 -0.1699642  -0.19937095 -0.18120994
 -0.13828278]


In [35]:
lasso = Lasso(alpha=0.2)

lasso.fit(X_train,y_train)

print ("Lasso model:", (lasso.coef_))

Lasso model: [ 0.          0.18971592 -0.44165633 -0.08292586 -0.07699728 -0.06161188
  0.        ]


if we change alpha drasticly then parameter of lasso model is change more

### Result on Polynomial Transformed Train and Test Dataset

In [38]:
print("Linear Regression Model Training Score: ", regression_model.score(X_train, y_train))

print("Linear Regression Model Testing Score: ",regression_model.score(X_test, y_test))

print("Ridge Regression Model Training Score: ",ridge.score(X_train, y_train))

print("Ridge Regression Model Testing Score: ",ridge.score(X_test, y_test))

print("Lasso Regression Model Training Score: ",lasso.score(X_train, y_train))

print("Lasso Regression Model Testing Score: ",lasso.score(X_test, y_test))

Linear Regression Model Training Score:  0.8244073780079281
Linear Regression Model Testing Score:  0.8149670026064411
Ridge Regression Model Training Score:  0.8244059086530131
Ridge Regression Model Testing Score:  0.8150111643173795
Lasso Regression Model Training Score:  0.7221981474611417
Lasso Regression Model Testing Score:  0.707512273692332


print("Linear Regression Model Coefficient :",regression_model.coef_)
print("Ridge Regression Model Coefficient :",ridge.coef_)
print("Lasso Regression Model Coefficient :",lasso.coef_)

### Elastic Net

In [39]:
from sklearn.linear_model import ElasticNet

ENreg = ElasticNet(alpha=0.5, l1_ratio=0.2, normalize=False)

ENreg.fit(X_train,y_train)

pred_cv = ENreg.predict(X_train)


ENreg.score(X_train,y_train)

0.7306970161020186

Cross-validation with ElasticNetCV

ElasticNetCV is a cross-validation class that can search multiple alpha values and applies the best one. We'll define the model with alphas value and fit it with xtrain and ytrain data.

In [40]:
from sklearn.linear_model import ElasticNet,ElasticNetCV

In [41]:
#To figure out what alpha value is good for our model, we'll define multiple values and check the performance accuracy.

alphas = [0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 1]

In [42]:
elastic_cv=ElasticNetCV(alphas=alphas, cv=5)
model = elastic_cv.fit(X_train, y_train)
print(model.alpha_)
print(model.intercept_)

0.01
-0.10336736215860041


In [43]:
#we'll check the R-squared, MSE, and RMSE metrics for each alpha.

for a in alphas:
    model = ElasticNet(alpha=a).fit(X,y)   
    score = model.score(X, y)
    pred_y = model.predict(X)
    mse = mean_squared_error(y, pred_y)   
    print("Alpha:{0:.4f}, R2:{1:.2f}, MSE:{2:.2f}, RMSE:{3:.2f}"
       .format(a, score, mse, np.sqrt(mse)))

Alpha:0.0001, R2:0.72, MSE:0.28, RMSE:0.53
Alpha:0.0010, R2:0.72, MSE:0.28, RMSE:0.53
Alpha:0.0100, R2:0.72, MSE:0.28, RMSE:0.53
Alpha:0.1000, R2:0.71, MSE:0.29, RMSE:0.54
Alpha:0.3000, R2:0.66, MSE:0.34, RMSE:0.58
Alpha:0.5000, R2:0.57, MSE:0.43, RMSE:0.65
Alpha:0.7000, R2:0.46, MSE:0.54, RMSE:0.73
Alpha:1.0000, R2:0.28, MSE:0.72, RMSE:0.85
