In [23]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [2]:
bean = datasets.load_boston()
print bean.DESCR

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [3]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

We are going to divide the data into training and test sets

In [4]:
X_train , X_test , y_train , y_test = load_boston()

In [5]:
X_train.shape

(379L, 13L)

Fitting a Linear Regression : 
We are instantiating a new regression object and then giving the training data to the object by calling .fit(independent variables , dependent variables) 

In [6]:
lr=LinearRegression()
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

Making a prediction. 
Combining the real value (y_test) with the value our regressor predicts (lr.predict(X_test))

In [7]:
zip(y_test, lr.predict(X_test))

[(21.699999999999999, 21.015963519127908),
 (18.800000000000001, 20.890717835806065),
 (20.600000000000001, 21.581595299367009),
 (24.800000000000001, 26.356236700177583),
 (22.600000000000001, 18.678877605972875),
 (18.600000000000001, 16.394965238790931),
 (30.5, 30.005921711583227),
 (19.300000000000001, 18.221990256787361),
 (19.100000000000001, 19.7810749509276),
 (20.600000000000001, 15.474824059711681),
 (22.399999999999999, 23.48390878457344),
 (48.5, 40.795108388461159),
 (20.300000000000001, 20.116618901317079),
 (16.0, 18.222762907177273),
 (17.5, 16.681876562853905),
 (41.299999999999997, 33.476685657859399),
 (7.2000000000000002, 17.766080765625283),
 (32.0, 33.284112164852019),
 (31.5, 31.888295795609881),
 (16.800000000000001, 20.434012469255233),
 (24.300000000000001, 29.868337216215203),
 (18.0, 18.917590523255008),
 (20.300000000000001, 22.967761456680037),
 (50.0, 39.590425696899921),
 (19.199999999999999, 24.153410796397189),
 (13.800000000000001, 2.4469668398539781

Implementing R^2 and MSE functions to measure the performance of Linear regression. 

In [8]:
y_predt = lr.predict(X_test)

R^2

In [9]:
r2fun = r2_score(y_test, y_predt)

In [10]:
r2fun

0.74794493464729872

R^2 which is the coefficient of Determination should be between 0 and 1 and larger is better.

MSE

In [11]:
mseFun = mean_squared_error(y_test, y_predt)

In [12]:
mseFun

25.583807292435662

R^2 : 
0.74794493464729872

MSE :
25.583807292435662


Ridge linear Regression :

Assigning a value to alpha and instantiating a new object:

In [15]:
alpha = 0.1
ridge=Ridge(alpha=alpha)

Fitting training data by calling .fit(independent variables, dependent variables)

In [16]:
ridge.fit(X_train,y_train)

Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

 Combining the real values to the predicted values by the Ridge Regression Model

In [17]:
zip(y_test,ridge.predict(X_test))

[(21.699999999999999, 21.017581678924426),
 (18.800000000000001, 20.878775430234583),
 (20.600000000000001, 21.582276184355123),
 (24.800000000000001, 26.354061233994315),
 (22.600000000000001, 18.680500922088729),
 (18.600000000000001, 16.408508116871257),
 (30.5, 30.008638127127782),
 (19.300000000000001, 18.222059108678906),
 (19.100000000000001, 19.778080900618821),
 (20.600000000000001, 15.485075628784648),
 (22.399999999999999, 23.483772764744145),
 (48.5, 40.784390632691832),
 (20.300000000000001, 20.114433687345855),
 (16.0, 18.221826715533052),
 (17.5, 16.686642441833868),
 (41.299999999999997, 33.470907889896267),
 (7.2000000000000002, 17.765768756804164),
 (32.0, 33.277103291201982),
 (31.5, 31.885976700084338),
 (16.800000000000001, 20.436112109533141),
 (24.300000000000001, 29.860961265184354),
 (18.0, 18.920772396126463),
 (20.300000000000001, 22.967732098979944),
 (50.0, 39.586971473601231),
 (19.199999999999999, 24.154625265448946),
 (13.800000000000001, 2.4454957828760

 Measuring R^2 and MSE of our Ridge Regression Model

In [18]:
y_predictedRg=ridge.predict(X_test)

R^2

In [19]:
r2funRidge=r2_score(y_test, y_predictedRg)

In [20]:
r2funRidge

0.74795893824464921

MSE

In [21]:
mseFunRidge=mean_squared_error(y_test,y_predictedRg)

In [22]:
mseFunRidge

25.582385915184169

R^2 :
0.74795893824464921

MSE :
25.582385915184169

When alpha = 0.1



Now let's see the change by giving different learning rate (alpha) values. 

In [25]:
ridge.fit(X_train,y_train)

Ridge(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [26]:
zip(y_test,ridge.predict(X_test))

[(21.699999999999999, 21.016125362965134),
 (18.800000000000001, 20.889519074057404),
 (20.600000000000001, 21.581662927021778),
 (24.800000000000001, 26.356018098062599),
 (22.600000000000001, 18.679040088578208),
 (18.600000000000001, 16.39632276900528),
 (30.5, 30.006194873857666),
 (19.300000000000001, 18.221996783207015),
 (19.100000000000001, 19.78077469851026),
 (20.600000000000001, 15.475850946874122),
 (22.399999999999999, 23.483895068640404),
 (48.5, 40.794034042814758),
 (20.300000000000001, 20.116399042404083),
 (16.0, 18.222667996772092),
 (17.5, 16.682354068198002),
 (41.299999999999997, 33.476107193795144),
 (7.2000000000000002, 17.766049449171327),
 (32.0, 33.283409718959845),
 (31.5, 31.8880634337565),
 (16.800000000000001, 20.43422297763696),
 (24.300000000000001, 29.867598449664111),
 (18.0, 18.917909753752603),
 (20.300000000000001, 22.967759028008565),
 (50.0, 39.590080013853239),
 (19.199999999999999, 24.153533519718476),
 (13.800000000000001, 2.446818793356325),


In [27]:
y_predictedRg=ridge.predict(X_test)

In [28]:
r2funRidge=r2_score(y_test, y_predictedRg)

In [29]:
r2funRidge

0.74794635667984122

In [30]:
mseFunRidge=mean_squared_error(y_test,y_predictedRg)

In [31]:
mseFunRidge

25.583662954901801

In [33]:
alpha = 10
ridge=Ridge(alpha=alpha)

In [34]:
ridge.fit(X_train,y_train)

Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [35]:
zip(y_test,ridge.predict(X_test))

[(21.699999999999999, 21.171783077363617),
 (18.800000000000001, 20.05183395033718),
 (20.600000000000001, 21.67849912499938),
 (24.800000000000001, 26.219860245281197),
 (22.600000000000001, 18.821489381976736),
 (18.600000000000001, 17.481891042414695),
 (30.5, 30.161749619354925),
 (19.300000000000001, 18.25879681712782),
 (19.100000000000001, 19.550096050306514),
 (20.600000000000001, 16.348494673986458),
 (22.399999999999999, 23.480421164707657),
 (48.5, 39.933856866047833),
 (20.300000000000001, 20.001171385025458),
 (16.0, 18.224923540616718),
 (17.5, 17.080734937954592),
 (41.299999999999997, 32.961429665867172),
 (7.2000000000000002, 17.742650620444323),
 (32.0, 32.712391860451291),
 (31.5, 31.690609548139296),
 (16.800000000000001, 20.596258001069213),
 (24.300000000000001, 29.234968512125555),
 (18.0, 19.153184473500062),
 (20.300000000000001, 22.931351481251351),
 (50.0, 39.267074679464216),
 (19.199999999999999, 24.182627545394354),
 (13.800000000000001, 2.3791814695718365

In [36]:
y_predictedRg=ridge.predict(X_test)

In [37]:
r2funRidge=r2_score(y_test, y_predictedRg)

In [38]:
r2funRidge

0.74768888446141812

In [39]:
mseFunRidge=mean_squared_error(y_test,y_predictedRg)

In [40]:
mseFunRidge

25.609796607919545

I have changed the alpha values such as
alpha = 0.1, 10 , 0.01 to optimize the chosen regressor (Ridge) and noticed that there is not much difference . 