# Boston Housing Assignment

## Loading and Setup

In [1]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
import math
from sklearn.linear_model import Lasso

In [2]:
bean = datasets.load_boston()
print(bean.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

### Train Set and Test Set Setup

In [3]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [4]:
X_train, X_test, y_train, y_test = load_boston()

In [5]:
X_train.shape

(379, 13)

## Linear Regression Implementation

### Fitting a Linear Regression (LR)

In [6]:
clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Making a Prediction using LR

#### List of real value (y_test) and predicted value tuples. 

In [7]:
list(zip (y_test, clf.predict(X_test)))

[(31.0, 34.762344723981386),
 (20.399999999999999, 19.627296134332145),
 (23.100000000000001, 11.036568572698242),
 (20.800000000000001, 18.675628021705077),
 (26.600000000000001, 22.304919100135262),
 (18.5, 19.425833322241179),
 (24.399999999999999, 24.02960151562683),
 (12.300000000000001, 12.304071870815577),
 (21.399999999999999, 24.473659304425873),
 (23.800000000000001, 27.085040294276119),
 (23.899999999999999, 28.053076417236994),
 (15.199999999999999, 11.838679451449282),
 (15.4, 14.978784325635196),
 (15.6, 16.671788285879416),
 (33.399999999999999, 35.475834864557044),
 (23.699999999999999, 9.1358977919039681),
 (11.9, 22.531757786962437),
 (20.300000000000001, 22.559947031797616),
 (27.5, 24.557526609992529),
 (37.0, 30.76656891550013),
 (28.100000000000001, 24.876902226527893),
 (22.0, 28.716039635550018),
 (23.300000000000001, 26.801967139034264),
 (13.1, 20.057198356161638),
 (24.800000000000001, 31.028917105366538),
 (20.199999999999999, 15.762927780327113),
 (10.5, 4.

#### MSE using our known test y values and our predicted y values.

In [8]:
y_hat = clf.predict(X_test)
mse = mean_squared_error(y_test, y_hat)
print(mse)

22.5481023847


#### RMSE using our MSE value

In [9]:
rmse = math.sqrt(mse)
print(rmse)

4.74848421969151


#### R^2 using our known test y values and our predicted y values.

In [10]:
r2 = r2_score(y_test, y_hat)
print(r2)

0.720629675555


## Lasso Implementation

### Fitting a Lasso

In [11]:
clf2 = Lasso()
clf2.fit(X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

### Making a Prediction using Lasso

#### List of real value (y_test) and predicted value tuples. 

In [12]:
list(zip (y_test, clf2.predict(X_test)))

[(31.0, 30.66170901411698),
 (20.399999999999999, 21.857107246601007),
 (23.100000000000001, 11.829074486533187),
 (20.800000000000001, 19.141091759666473),
 (26.600000000000001, 27.701142403892106),
 (18.5, 19.993509382453219),
 (24.399999999999999, 26.397295056483856),
 (12.300000000000001, 11.963842244553382),
 (21.399999999999999, 22.965920901337295),
 (23.800000000000001, 25.849678402867006),
 (23.899999999999999, 27.120147212858871),
 (15.199999999999999, 15.296566710501525),
 (15.4, 17.549831097939659),
 (15.6, 20.261021744380155),
 (33.399999999999999, 30.043858902093927),
 (23.699999999999999, 9.9135150051294314),
 (11.9, 22.365931165264726),
 (20.300000000000001, 20.063077184291753),
 (27.5, 24.209656587620724),
 (37.0, 31.131898728446828),
 (28.100000000000001, 24.425117008431556),
 (22.0, 26.809231965009374),
 (23.300000000000001, 28.570838059618811),
 (13.1, 19.548124592109964),
 (24.800000000000001, 30.570837274822139),
 (20.199999999999999, 18.15620725336268),
 (10.5, 6.

#### MSE using our known test y values and our predicted y values.

In [13]:
y_hat2 = clf2.predict(X_test)
mse2 = mean_squared_error(y_test, y_hat2)
print(mse2)

28.9410826274


#### RMSE using our MSE2 value

In [14]:
rmse2 = math.sqrt(mse2)
print(rmse2)

5.379691685162526


#### R^2 using our known test y values and our predicted y values.

In [15]:
r2_2 = r2_score(y_test, y_hat2)
print(r2_2)

0.641420838637


## Optimizing the Lasso Regularization Parameter

### alpha = 0.1

In [16]:
reg = Lasso(alpha = 0.1)
reg.fit(X_train, y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [17]:
y_hat3 = reg.predict(X_test)
mse3 = mean_squared_error(y_test, y_hat3)
print(mse3)

23.3762092602


In [18]:
rmse3 = math.sqrt(mse3)
print(rmse3)

4.83489495854982


In [19]:
r2_3 = r2_score(y_test, y_hat3)
print(r2_3)

0.710369455757


### alpha = 0.01

In [20]:
reg = Lasso(alpha = 0.01)
reg.fit(X_train, y_train)

Lasso(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [21]:
y_hat3 = reg.predict(X_test)
mse3 = mean_squared_error(y_test, y_hat3)
print(mse3)

22.5473356677


In [22]:
rmse3 = math.sqrt(mse3)
print(rmse3)

4.748403486192937


In [23]:
r2_3 = r2_score(y_test, y_hat3)
print(r2_3)

0.720639175156


### alpha = 0.001

In [24]:
reg = Lasso(alpha = 0.001)
reg.fit(X_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [25]:
y_hat3 = reg.predict(X_test)
mse3 = mean_squared_error(y_test, y_hat3)
print(mse3)

22.547259982


In [26]:
rmse3 = math.sqrt(mse3)
print(rmse3)

4.748395516591567


In [27]:
r2_3 = r2_score(y_test, y_hat3)
print(r2_3)

0.720640112899


### alpha = 0.0001

In [32]:
reg = Lasso(alpha = 0.0001)
reg.fit(X_train, y_train)

Lasso(alpha=0.0001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [33]:
y_hat3 = reg.predict(X_test)
mse3 = mean_squared_error(y_test, y_hat3)
print(mse3)

22.5480097149


In [34]:
rmse3 = math.sqrt(mse3)
print(rmse3)

4.748474461859075


In [35]:
r2_3 = r2_score(y_test, y_hat3)
print(r2_3)

0.72063082373


### alpha = 0.001 seems to be the best