# Boston Housing Assignment

## Loading and Setup

In [3]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
import math
from sklearn.linear_model import Lasso

In [62]:
bean = datasets.load_boston()
print(bean.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

### Train Set and Test Set Setup

In [4]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [5]:
X_train, X_test, y_train, y_test = load_boston()

In [6]:
X_train.shape

(379, 13)

## Linear Regression Implementation

### Fitting a Linear Regression (LR)

In [7]:
clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Making a Prediction using LR

#### List of real value (y_test) and predicted value tuples. 

In [63]:
list(zip (y_test, clf.predict(X_test)))
print(real_predicted)

[(22.0, 20.703568152043385), (18.100000000000001, 17.063915819921966), (18.5, 12.881266260595801), (50.0, 42.424650968691296), (33.299999999999997, 36.431523147981025), (29.399999999999999, 31.091188055760927), (25.0, 25.813306869512637), (19.5, 17.172729490705393), (10.5, 12.23170321363132), (32.899999999999999, 30.141566047417072), (20.100000000000001, 17.959930745665638), (33.200000000000003, 35.333090593102753), (50.0, 44.795667189645471), (18.399999999999999, 15.149926730834149), (13.1, 19.919801971919195), (20.0, 20.610485251221601), (7.5, 14.115147562633801), (23.300000000000001, 26.368047459158731), (14.5, 18.371478576078125), (31.0, 35.488649029243128), (7.0, 8.0836747469280255), (10.199999999999999, 16.357312903347736), (13.4, 13.593664370349126), (23.100000000000001, 16.862876004619437), (25.0, 22.085487814114419), (34.899999999999999, 34.967758447222977), (19.0, 14.107905456266305), (19.199999999999999, 19.670050372455687), (19.600000000000001, 18.945941203735305), (10.8000

#### MSE using our known test y values and our predicted y values.

In [9]:
y_hat = clf.predict(X_test)
mse = mean_squared_error(y_test, y_hat)
print(mse)

27.9945144707


#### RMSE using our MSE value

In [10]:
rmse = math.sqrt(mse)
print(rmse)

5.290984262944279


#### R^2 using our known test y values and our predicted y values.

In [11]:
r2 = r2_score(y_test, y_hat)
print(r2)

0.656249304237


## L2 Implementation

### Fitting a Lasso

In [12]:
clf2 = Lasso(alpha = 0.1)
clf2.fit(X_train, y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

### Making a Prediction using Lasso

#### List of real value (y_test) and predicted value tuples. 

In [13]:
real_predicted2 = list(zip (y_test, clf2.predict(X_test)))
print(real_predicted2)

[(22.0, 21.310542408050694), (18.100000000000001, 18.163108571164486), (18.5, 13.429363538895135), (50.0, 41.580212343434376), (33.299999999999997, 36.41461090114624), (29.399999999999999, 30.561751408770654), (25.0, 26.040944466756564), (19.5, 17.298779277604485), (10.5, 12.461575607356259), (32.899999999999999, 30.704130180052957), (20.100000000000001, 17.427554771167628), (33.200000000000003, 34.547463497046706), (50.0, 43.398262179047848), (18.399999999999999, 16.24939210849422), (13.1, 19.632580230743574), (20.0, 20.321010810736567), (7.5, 14.215351024777327), (23.300000000000001, 25.528515341915163), (14.5, 17.872608164454782), (31.0, 34.688345915333649), (7.0, 10.72915982774842), (10.199999999999999, 15.818463818339765), (13.4, 13.994239414241125), (23.100000000000001, 17.064506111912593), (25.0, 22.409616197159966), (34.899999999999999, 34.78385886971077), (19.0, 14.045240072450213), (19.199999999999999, 20.779410658776758), (19.600000000000001, 20.10175061406612), (10.80000000

#### MSE using our known test y values and our predicted y values.

In [25]:
y_hat2 = clf2.predict(X_test)
mse2 = mean_squared_error(y_test, y_hat2)
print(mse2)

28.2382213692


#### RMSE using our MSE2 value

In [16]:
rmse2 = math.sqrt(mse2)
print(rmse2)

5.313964750466217


#### R^2 using our known test y values and our predicted y values.

In [17]:
r2_2 = r2_score(y_test, y_hat2)
print(r2_2)

0.65325677454


## Optimizing the Lasso Regularization Parameter

### alpha = 0.1

In [50]:
reg = Lasso(alpha = 0.1)
reg.fit(X_train, y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [51]:
y_hat3 = reg.predict(X_test)
mse3 = mean_squared_error(y_test, y_hat3)
print(mse3)

28.2382213692


In [52]:
rmse3 = math.sqrt(mse3)
print(rmse3)

5.313964750466217


In [53]:
r2_3 = r2_score(y_test, y_hat3)
print(r2_3)

0.65325677454


### alpha = 0.01

In [54]:
reg = Lasso(alpha = 0.01)
reg.fit(X_train, y_train)

Lasso(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [55]:
y_hat3 = reg.predict(X_test)
mse3 = mean_squared_error(y_test, y_hat3)
print(mse3)

27.9723748377


In [56]:
rmse3 = math.sqrt(mse3)
print(rmse3)

5.288891645490182


In [57]:
r2_3 = r2_score(y_test, y_hat3)
print(r2_3)

0.656521161578


### alpha = 0.001

In [58]:
reg = Lasso(alpha = 0.001)
reg.fit(X_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [59]:
y_hat3 = reg.predict(X_test)
mse3 = mean_squared_error(y_test, y_hat3)
print(mse3)

27.991686294


In [60]:
rmse3 = math.sqrt(mse3)
print(rmse3)

5.290716992434148


In [61]:
r2_3 = r2_score(y_test, y_hat3)
print(r2_3)

0.65628403203
