# Import some libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error

# Load Dataset

In [2]:
boston = datasets.load_boston()
df = pd.DataFrame(boston.data,columns = boston.feature_names)
df.head()


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [3]:
y = boston.target

In [4]:
x_train,x_test,y_train,y_test = train_test_split(df,y,test_size=0.2)

# Checking the r2 score using Linear Regression

In [5]:
clf = LinearRegression()
clf.fit(x_train,y_train)
y_train_pred = clf.predict(x_train)
y_test_pred = clf.predict(x_test)

In [22]:
print("r2_score for training data:",r2_score(y_train,y_train_pred))
print("r2_score for test data:",r2_score(y_test,y_test_pred))
# Little bit variance in training and testing dataset.

r2_score for training data: 0.8941569190776693
r2_score for test data: 0.8136532287593907


In [7]:
print(mean_squared_error(y_train,y_train_pred))
print(mean_squared_error(y_test,y_test_pred))

18.935228350081598
35.25457863605966


In [25]:
clf1 = Ridge(alpha=1)
clf1.fit(x_train,y_train)
y_train_pred = clf1.predict(x_train)
y_test_pred = clf1.predict(x_test)
print("r2_score of Ridge for training dataset:",r2_score(y_train,y_train_pred))
print("r2_score of Ridge for testing dataset:",r2_score(y_test,y_test_pred))
# Ridge has lower r2 score than Linear Regression because of the 
# Regularization term.

r2_score of Ridge for training dataset: 0.7690898750666999
r2_score of Ridge for testing dataset: 0.606859162052418


In [9]:
clf1.coef_

array([-0.07891088,  0.04759914, -0.07105717,  1.20159364, -1.6011118 ,
        4.14725079, -0.01740719, -1.28551268,  0.22885736, -0.01326222,
       -0.78251002,  0.01299577, -0.49761205])

In [26]:
clf2 = Lasso(alpha=1)
clf2.fit(x_train,y_train)
y_train_pred = clf2.predict(x_train)
y_test_pred = clf2.predict(x_test)
print("r2_score of Lasso for training dataset:",r2_score(y_train,y_train_pred))
print("r2_score of Lasso for training dataset:",r2_score(y_test,y_test_pred))
# r2_score of Lasso is less than Ridge because the unimportant coefficient
# converge to zero in Lasso.

r2_score of Lasso for training dataset: 0.7182169184826454
r2_score of Lasso for training dataset: 0.5934373698426231


In [11]:
clf2.coef_

array([-0.04481217,  0.04654169, -0.02543223,  0.        , -0.        ,
        1.47139135,  0.0125106 , -0.75297663,  0.22990435, -0.01502823,
       -0.69936644,  0.01125583, -0.69233885])

# Now apply polynomial transformation and use Linear regression, ridge and lasso

In [12]:
poly = PolynomialFeatures(degree=3)
x_train_transform = poly.fit_transform(x_train)
x_test_transform = poly.transform(x_test)

In [13]:
clf3 = LinearRegression()
clf3.fit(x_train_transform,y_train)
y_train_pred = clf3.predict(x_train_transform)
y_test_pred = clf3.predict(x_test_transform)
print(r2_score(y_train,y_train_pred))
print(r2_score(y_test,y_test_pred))
# Since degree=3 cause high variance in training and testing, in Linear
# Regression.

0.9999999999999999
-11352.635874758947


In [14]:
clf4 = GridSearchCV(Ridge(),{"alpha":[100,1000,10000,500000]},cv=5,scoring="r2")
clf4.fit(x_train_transform,y_train)
# Using GridSearchCV to find out the alpha which can decrease the variance
# in training and testing dataset.

  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)


In [27]:
pd.DataFrame(clf4.cv_results_)
# Maximum r2_score obtained at alpha=500000

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.009332,0.000951,0.000203,0.000406,100,{'alpha': 100},-12.696824,-0.624778,-0.447921,-62.073075,-0.254091,-15.219338,23.903097,4
1,0.003019,0.0007,0.0002,0.000401,1000,{'alpha': 1000},-35.900299,0.261508,-2.188115,-7.711313,0.060663,-9.095511,13.707765,3
2,0.003299,0.000507,0.000599,0.000489,10000,{'alpha': 10000},-15.67753,0.546145,0.337748,-5.615836,0.447472,-3.9924,6.296647,2
3,0.003398,0.000374,0.000199,0.000399,500000,{'alpha': 500000},-1.776959,0.828859,0.721194,-1.449495,0.835926,-0.168095,1.185179,1


In [28]:
clf5 = GridSearchCV(Lasso(),{"alpha":[1000,2000,5000]},cv=5,scoring="r2")
clf5.fit(x_train_transform,y_train)
# Same for Lasso

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [29]:
pd.DataFrame(clf5.cv_results_)
# Maximum r2_score obtained at alpha=2000

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.059602,0.008088,0.000303,0.000606,1000,{'alpha': 1000},0.662671,0.807569,0.876992,0.825762,0.881524,0.810904,0.079448,2
1,0.058599,0.004795,0.000803,0.000401,2000,{'alpha': 2000},0.674075,0.813517,0.872263,0.816671,0.884688,0.812243,0.074783,1
2,0.053351,0.003662,0.000501,0.000634,5000,{'alpha': 5000},0.64334,0.828426,0.858915,0.799609,0.884332,0.802924,0.084724,3


In [30]:
clf6 = Ridge(alpha=500000)
clf6.fit(x_train_transform,y_train)
y_train_pred = clf6.predict(x_train_transform)
y_test_pred = clf6.predict(x_test_transform)
print("r2_score of Ridge for training dataset:",r2_score(y_train,y_train_pred))
print("r2_score of Ridge for testing dataset:",r2_score(y_test,y_test_pred))

r2_score of Ridge for training dataset: 0.9738753134938786
r2_score of Ridge for testing dataset: 0.6148862458762009


In [31]:
clf7 = Lasso(alpha=2000)
clf7.fit(x_train_transform,y_train)
y_train_pred = clf7.predict(x_train_transform)
y_test_pred = clf7.predict(x_test_transform)
print("r2_score of Lasso for training dataset:",r2_score(y_train,y_train_pred))
print("r2_score of Lasso for testing dataset:",r2_score(y_test,y_test_pred))

r2_score of Lasso for training dataset: 0.8941569190776693
r2_score of Lasso for testing dataset: 0.8136532287593907


  model = cd_fast.enet_coordinate_descent(


In [32]:
# It is seen that Regularization term will decrease the variance in 
# training and testing data.