# Linear Regression using Scikit-Learn

In [8]:
# Load libraries
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn import metrics
import warnings

# Suppress Warning
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

# Load Boston Housing Data

In [9]:
boston = load_boston()
X = boston.data # Independent variables
Y = boston.target # target variable is House Price

# Splitting the dataset into train & test

In [10]:
x_train,x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.30, random_state=0)
print("x_train ",x_train.shape)
print("x_test ",x_test.shape)
print("y_train ",y_train.shape)
print("y_test ",y_test.shape)

x_train  (354, 13)
x_test  (152, 13)
y_train  (354,)
y_test  (152,)


# Fit a Linear Regression to predict house price 

In [11]:
# Create linear Regression
regr = LinearRegression(normalize=True)

# Fit the linear regression 
model = regr.fit(x_train,y_train)

# View Intercept

In [12]:
model.intercept_

37.9925927703448

# View Coefficients

In [13]:
model.coef_

array([ -1.19858618e-01,   4.44233009e-02,   1.18612465e-02,
         2.51295058e+00,  -1.62710374e+01,   3.84909910e+00,
        -9.85471557e-03,  -1.50002715e+00,   2.41507916e-01,
        -1.10671867e-02,  -1.01897720e+00,   6.95273216e-03,
        -4.88110587e-01])

# Coefficient of determination R^2 of the prediction

In [14]:
model.score(x_train,y_train)

0.76445633918212219

# Parameters of this Estimator

In [15]:
model.get_params(deep=True)

{'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': True}

In [17]:
cols = ['Model', 'R-Squared Value', 'MSE']
models_report = pd.DataFrame(columns = cols)
# Predicting the model on test data
y_pred_lin = model.predict(x_test)
# Predicted house price using Linear Regression
y_pred_lin

array([ 24.93551831,  23.75668597,  29.3364008 ,  11.99898444,
        21.37583999,  19.19718511,  20.57022126,  21.2138302 ,
        19.05187659,  20.31028442,   5.47685057,  16.88415507,
        17.13177611,   5.41132187,  40.2160287 ,  32.30923608,
        22.46445111,  36.50566714,  31.03913253,  23.17552674,
        24.75105205,  24.51122436,  20.65675756,  30.45679279,
        22.33344401,  10.18647997,  17.44394817,  18.24663845,
        35.62978156,  20.81890427,  18.26969192,  17.71558071,
        19.34036094,  23.62642525,  28.98283423,  19.43835179,
        11.14022634,  24.82272051,  18.00566388,  15.57161273,
        26.2207858 ,  20.81349155,  22.17395321,  15.48276713,
        22.6264291 ,  24.89397648,  19.75674027,  23.03741163,
         9.84032063,  24.36186515,  21.43835984,  17.61122699,
        24.39248313,  29.93655984,  13.55766168,  21.53449652,
        20.53870439,  15.03422398,  14.34404261,  22.12289936,
        17.07752265,  21.54237223,  32.96641318,  31.37

In [18]:
tmp1 = pd.Series({'Model': " Base Linear Regression Model",
                 'R-Squared Value' : model.score(x_train,y_train),
                 'MSE': metrics.mean_squared_error(y_pred_lin, y_test)})

model1_report = models_report.append(tmp1, ignore_index = True)
model1_report

Unnamed: 0,Model,R-Squared Value,MSE
0,Base Linear Regression Model,0.764456,27.183848


# Predict house price using SGD Regression

In [22]:
# Create linear Regression
regr = SGDRegressor(loss='squared_loss',alpha=0.001,max_iter=100, random_state=0)

# Fit the linear regression 
sgd = regr.fit(X,Y)

# View Intercept

In [23]:
sgd.intercept_

array([  8.39570026e+09])

# View Coefficients

In [24]:
sgd.coef_

array([  5.14539953e+11,   3.04136242e+10,  -2.60739339e+11,
        -3.44106152e+10,  -5.88296888e+09,  -3.88597583e+10,
         3.30346382e+11,   3.62339778e+11,   1.77254623e+11,
         2.73047152e+11,   1.78901650e+11,   2.35696224e+11,
         2.03086642e+11])

# View Actual Number of Iterations

In [25]:
sgd.n_iter_

100

In [28]:
# Predicting the model on test data
y_pred_sgd = sgd.predict(x_test)
# Predicted house price using SGD Regression
y_pred_sgd

array([  2.17589776e+14,   3.11205302e+14,   2.02019591e+14,
         3.27448968e+14,   2.22487435e+14,   2.01531227e+14,
         2.99805551e+14,   1.80617933e+14,   1.84160558e+14,
         1.55158970e+14,   2.64359733e+14,   2.38132550e+14,
         2.99153868e+14,   3.30230417e+14,   2.34458617e+14,
         1.97273261e+14,   3.06100451e+14,   2.02511013e+14,
         1.88706981e+14,   1.89230493e+14,   1.94607380e+14,
         2.26920071e+14,   2.30231332e+14,   1.79664135e+14,
         1.74060674e+14,   2.59074557e+14,   2.12628092e+14,
         2.07410746e+14,   1.98712207e+14,   2.48933948e+14,
         3.15914131e+14,   3.13664412e+14,   2.13567962e+14,
         2.04728124e+14,   2.07220049e+14,   3.03188846e+14,
         3.30251614e+14,   3.02799277e+14,   2.41563661e+14,
         2.97281803e+14,   1.82025373e+14,   1.74700337e+14,
         2.21570259e+14,   3.08118264e+14,   2.33180175e+14,
         1.90811263e+14,   1.96920774e+14,   2.19988027e+14,
         2.41983671e+14,

In [29]:
tmp1 = pd.Series({'Model': " Regression using SGD ",
                 'R-Squared Value' : sgd.score(x_train,y_train),
                 'MSE': metrics.mean_squared_error(y_pred_sgd, y_test)})

model1_report = models_report.append(tmp1, ignore_index = True)
model1_report

Unnamed: 0,Model,R-Squared Value,MSE
0,Regression using SGD,-6.27793e+26,5.4202e+28


Thus, we predicted the house prices using Linear Regression and SGD Regression.