## Boston Housing Assignment

In [37]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso


In [38]:
bean = datasets.load_boston()

In [39]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [40]:
X_train, X_test, y_train, y_test = load_boston()

In [41]:
X_train.shape

(379, 13)

## Fitting a Linear Regression

In [42]:
clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

## Making a Prediction

In [127]:
y_pred=clf.predict(X_test)
zip (y_test, y_pred)

[(22.199999999999999, 18.876359373856424),
 (23.699999999999999, 27.516494307231714),
 (37.299999999999997, 33.262528667748356),
 (17.699999999999999, 20.107634277369247),
 (23.699999999999999, 9.2067641984410358),
 (23.199999999999999, 17.041898205993686),
 (8.3000000000000007, 13.679789599199152),
 (5.0, 9.4679597365754926),
 (21.5, 25.946646396530461),
 (27.5, 19.595145410307993),
 (21.399999999999999, 22.493549688104036),
 (33.399999999999999, 35.081826618167391),
 (18.800000000000001, 21.459413585957865),
 (21.399999999999999, 24.094801150548097),
 (24.5, 20.05664880389638),
 (36.399999999999999, 32.578402683724029),
 (13.9, 13.496276249771213),
 (29.100000000000001, 30.956012848751449),
 (23.0, 23.169135019107422),
 (14.800000000000001, 14.656040574741969),
 (37.899999999999999, 34.322513873648568),
 (27.0, 34.444873540801424),
 (23.699999999999999, 27.754830761108849),
 (8.0999999999999996, 4.3642655648396627),
 (23.600000000000001, 31.255714942263936),
 (20.899999999999999, 20.

## Calculation of R2 and MSE for Linear Regression

In [44]:
r2_score(y_test, y_pred)

0.68898934256041366

In [45]:
mean_squared_error(y_test, y_pred)

27.936419982787257

## Implementing Lasso

In [109]:
clf2 = Lasso()

In [110]:
clf2.fit (X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [123]:
y_pred2 = clf2.predict(X_test)
zip (y_test, y_pred2)

[(22.199999999999999, 21.054504358046685),
 (23.699999999999999, 25.606472416529265),
 (37.299999999999997, 29.736861022161289),
 (17.699999999999999, 20.803173694479668),
 (23.699999999999999, 10.657617460185779),
 (23.199999999999999, 17.524025778216199),
 (8.3000000000000007, 14.569545124637003),
 (5.0, 14.120293286029964),
 (21.5, 20.542564332768151),
 (27.5, 19.038326591300173),
 (21.399999999999999, 19.94187113405324),
 (33.399999999999999, 29.740134395678801),
 (18.800000000000001, 19.794420917254797),
 (21.399999999999999, 24.181828710710299),
 (24.5, 20.326563852288885),
 (36.399999999999999, 32.534160095678573),
 (13.9, 16.21677777721775),
 (29.100000000000001, 29.414045024622482),
 (23.0, 25.193174430213439),
 (14.800000000000001, 17.460677350367419),
 (37.899999999999999, 29.951792305157642),
 (27.0, 28.57403197402575),
 (23.699999999999999, 27.40125357050929),
 (8.0999999999999996, 8.4203382707963534),
 (23.600000000000001, 29.090982202808412),
 (20.899999999999999, 22.751

## Calculation of R2 and MSE for Lasso

In [112]:
r2_score(y_test, y_pred2)

0.63588345147465897

In [113]:
mean_squared_error(y_test, y_pred2)

32.70663747033425

## Optimizing Lasso

The regularization paramater 'alpha' controls the degree of sparsity of the coefficients estimated. Hence, has a significant effect on predictions. I have adjusted the alpha value to 0.001 and increased the iterations to 1500 which gave me good results like Linear Regression

In [119]:
clf_optm = Lasso(alpha=0.001,max_iter=1500)
clf_optm.fit (X_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1500,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [126]:
y_pred_optm = clf_optm.predict(X_test)
zip (y_test, y_pred_optm)

[(22.199999999999999, 18.882773252227516),
 (23.699999999999999, 27.503674172125475),
 (37.299999999999997, 33.247327468220796),
 (17.699999999999999, 20.106219421198951),
 (23.699999999999999, 9.2158104765573015),
 (23.199999999999999, 17.041050207165057),
 (8.3000000000000007, 13.679284759563769),
 (5.0, 9.480755491856522),
 (21.5, 25.931894547810657),
 (27.5, 19.588442981169596),
 (21.399999999999999, 22.46384363000945),
 (33.399999999999999, 35.066210782870826),
 (18.800000000000001, 21.463734475271522),
 (21.399999999999999, 24.096865716067679),
 (24.5, 20.069604116070252),
 (36.399999999999999, 32.586143031106516),
 (13.9, 13.502964597563242),
 (29.100000000000001, 30.953589445443718),
 (23.0, 23.176146570609202),
 (14.800000000000001, 14.663637772859486),
 (37.899999999999999, 34.313904151453997),
 (27.0, 34.432443130459092),
 (23.699999999999999, 27.753127528763052),
 (8.0999999999999996, 4.3863415594563229),
 (23.600000000000001, 31.258017204461545),
 (20.899999999999999, 20.7

## Performance of optimized Lasso

In [124]:
r2_score(y_test, y_pred_optm)

0.68903774916696636

In [125]:
mean_squared_error(y_test, y_pred_optm)

27.932071876835749