In [198]:
#Boston Housing DataSet 

# Import Scikit-Learn module
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

# Load the Boston house pricing dataset
from sklearn.datasets import load_boston
sklearn.set_config(print_changed_only=True)

# Arrange the dataset into features and target 
boston = load_boston()
X, y = boston.data, boston.target

# Display the dataset description
#print(boston.DESCR)

In [199]:
# Obtain a DataFrame for the Boston house pricing data
df = pd.DataFrame(data=boston.data, columns=boston.feature_names)
df['price'] = y
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


# Linear Regression

In [200]:
# Import the corresponding library
from sklearn.linear_model import LinearRegression

# Instantiating a model object named 'model'
model = LinearRegression(normalize=True)

# Obtain model information to verify
print(model)

LinearRegression(normalize=True)


In [201]:
# Perform the train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Fit the Regression model
model.fit(X_train, y_train)

LinearRegression(normalize=True)

In [219]:
# Obtain the model predictions for the test dataset
y_pred = model.predict(X_test)


# Obtain the R-Squared performace metric
train_score_lr = model.score(X_train, y_train)
test_score_lr = model.score(X_test, y_test)

print("The train score for linear regression model is: ", np.round(train_score_lr, 2))
print("The test score for linear regression model is: ", np.round(test_score_lr, 2))
print("r^2 on test data-elastic net : %f" % r2_score(y_test, y_pred))

The train score for linear regression model is:  0.75
The test score for linear regression model is:  0.68
r^2 on test data-elastic net : 0.684427


# Lasso Regression (L1 Regularization)

Lasso shrinks the less important features 

In [203]:
from sklearn.linear_model import Lasso

#Value changed from 0.1 to 0.9 - no change in test score

modelLasso = Lasso(alpha=0.6)

modelLasso.fit(X_train, y_train)

Lasso(alpha=0.6)

In [204]:
modelLasso.coef_

array([-0.10034894,  0.02975719, -0.        ,  0.        , -0.        ,
        2.73427252,  0.        , -0.82245451,  0.22328552, -0.0118226 ,
       -0.72633624,  0.01292419, -0.67932987])

In [220]:
# Obtain the model predictions for the test dataset
y_pred = modelLasso.predict(X_test)


# Obtain the R-Squared performace metric
train_score_ls = modelLasso.score(X_train, y_train)
test_score_ls = modelLasso.score(X_test, y_test)

print("The train score for lasso regression model is: ", np.round(train_score_ls, 2))
print("The test score for lasso regression model is: ", np.round(test_score_ls, 2))
print("r^2 on test data-elastic net : %f" % r2_score(y_test, y_pred))

The train score for lasso regression model is:  0.72
The test score for lasso regression model is:  0.67
r^2 on test data-elastic net : 0.666661


# Ridge Regression
 Hint: Will add penality to the terms

In [206]:
from sklearn.linear_model import Ridge

modelRegression = Ridge(alpha=0.1)

modelRegression.fit(X_train, y_train)

Ridge(alpha=0.1)

In [207]:
modelRegression.coef_

array([-1.27536016e-01,  2.98537815e-02,  4.35486433e-02,  2.74033483e+00,
       -1.49671032e+01,  4.37348397e+00, -1.03183434e-02, -1.38208336e+00,
        2.55296380e-01, -1.00999145e-02, -9.07649627e-01,  1.32371402e-02,
       -5.20257280e-01])

In [208]:
# Obtain the model predictions for the test dataset
y_predl2 = modelRegression.predict(X_test)


# Obtain the R-Squared performace metric
train_score_l2 = modelRegression.score(X_train, y_train)
test_score_l2 = modelRegression.score(X_test, y_test)

print("The train score for lasso regression model is: ", np.round(train_score_l2, 2))
print("The test score for lasso regression model is: ", np.round(test_score_l2,2))

print("r^2 on test data : %f" % r2_score(y_test, y_predl2))

The train score for lasso regression model is:  0.75
The test score for lasso regression model is:  0.68
r^2 on test data : 0.683805


# Lasso CV

best model found by CV

In [209]:
from sklearn.linear_model import LassoCV
modelLassoCV = LassoCV(alphas=[0.1, 0.5, 0.9, 1.0, 10])

modelLassoCV.fit(X_train, y_train)



LassoCV(alphas=[0.1, 0.5, 0.9, 1.0, 10])

In [210]:
modelLassoCV.alpha_

0.1

In [211]:
# Obtain the model predictions for the test dataset
y_predLassoCV = modelLassoCV.predict(X_test)


# Obtain the R-Squared performace metric
train_score_l1cv = modelLassoCV.score(X_train, y_train)
test_score_l1cv = modelLassoCV.score(X_test, y_test)

print("The train score for lasso regression model is: ", np.round(train_score_l1cv, 2))
print("The test score for lasso regression model is: ", np.round(test_score_l1cv,2))
print("r^2 on test data : %f" % r2_score(y_test, y_predLassoCV))

The train score for lasso regression model is:  0.74
The test score for lasso regression model is:  0.67
r^2 on test data : 0.666045


# Elastic Net

Elastic net combines both penalties - l1 and l2

## HyperParamaters
 **alpha** - assigns the weight to  l1 and l2
 **lambda** - controls the wieghts of the sum of penalties 

In [216]:
from sklearn.linear_model import ElasticNet
modelElasticNet = ElasticNet(alpha=0.8, l1_ratio=0.9)

modelElasticNet.fit(X_train, y_train)
y_predent = modelElasticNet.predict(X_test)

print("r^2 on test data-elastic net : %f" % r2_score(y_test, y_predent))

r^2 on test data-elastic net : 0.661180


In [218]:
  
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import RepeatedKFold
from numpy import arange


#cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
ratios = arange(0, 1, 0.01)
alphas = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0]
#modelElasticNetCV = ElasticNetCV(l1_ratio=ratios, alphas=alphas, cv=cv, n_jobs=-1)
# fit model
#modelElasticNetCV.fit(X_train, y_train)
# summarize chosen configuration
#print('alpha: %f' % modelElasticNetCV.alpha_)
#print('l1_ratio_: %f' % modelElasticNetCV.l1_ratio_)

# alpha: 0.000000
# l1_ratio_: 0.230000


modelElasticNet = ElasticNet(alpha=0.0, l1_ratio=0.230)

modelElasticNet.fit(X_train, y_train)
y_predent = modelElasticNet.predict(X_test)

print("r^2 on test data-elastic net : %f" % r2_score(y_test, y_predent))

r^2 on test data-elastic net : 0.684427


  modelElasticNet.fit(X_train, y_train)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
