# Linear Regression using Scikit-Learn

In [115]:
# Load libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn import metrics
import warnings

# Suppress Warning
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

# Load Boston Housing Data

In [100]:
boston = load_boston()
X = boston.data
Y = boston.target

In [101]:
#Let us now split the dataset into train & test
from sklearn.cross_validation import train_test_split
x_train,x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.30, random_state=0)
print("x_train ",x_train.shape)
print("x_test ",x_test.shape)
print("y_train ",y_train.shape)
print("y_test ",y_test.shape)

x_train  (354, 13)
x_test  (152, 13)
y_train  (354,)
y_test  (152,)


# Fit a Linear Regression 

In [109]:
# Create linear Regression
regr = LinearRegression(normalize=True)

# Fit the linear regression 
model = regr.fit(x_train,y_train)

# View Intercept

In [110]:
model.intercept_

37.9925927703448

# View Coefficients

In [111]:
model.coef_

array([ -1.19858618e-01,   4.44233009e-02,   1.18612465e-02,
         2.51295058e+00,  -1.62710374e+01,   3.84909910e+00,
        -9.85471557e-03,  -1.50002715e+00,   2.41507916e-01,
        -1.10671867e-02,  -1.01897720e+00,   6.95273216e-03,
        -4.88110587e-01])

# Coefficient of determination R^2 of the prediction

In [105]:
model.score(x_train,y_train)

0.73780923550562572

# Parameters of this Estimator

In [106]:
model.get_params(deep=True)

{'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': True}

In [123]:
cols = ['Model', 'R-Squared Value', 'MSE']
models_report = pd.DataFrame(columns = cols)
# Predicting the model on test data
y_pred_lin = model.predict(x_test)

In [124]:
tmp1 = pd.Series({'Model': " Base Linear Regression Model",
                 'R-Squared Value' : model.score(x_train,y_train),
                 'MSE': metrics.mean_squared_error(y_pred_lin, y_test)})

model1_report = models_report.append(tmp1, ignore_index = True)
model1_report

Unnamed: 0,Model,R-Squared Value,MSE
0,Base Linear Regression Model,0.764456,27.183848


# Regression using SGD in Scikit-Learn

In [92]:
# Create linear Regression
regr = SGDRegressor(loss='squared_loss',alpha=0.001,max_iter=100, random_state=0)

# Fit the linear regression 
sgd = regr.fit(x,y)

# View Intercept

In [93]:
sgd.intercept_

array([  8.39570026e+09])

# View Coefficients

In [94]:
sgd.coef_

array([  5.14539953e+11,   3.04136242e+10,  -2.60739339e+11,
        -3.44106152e+10,  -5.88296888e+09,  -3.88597583e+10,
         3.30346382e+11,   3.62339778e+11,   1.77254623e+11,
         2.73047152e+11,   1.78901650e+11,   2.35696224e+11,
         2.03086642e+11])

# View Actual Number of Iterations

In [95]:
sgd.n_iter_

100

In [127]:
# Predicting the model on test data
y_pred_sgd = sgd.predict(x_test)

In [128]:
tmp1 = pd.Series({'Model': " Regression using SGD ",
                 'R-Squared Value' : sgd.score(x_train,y_train),
                 'MSE': metrics.mean_squared_error(y_pred_sgd, y_test)})

model1_report = models_report.append(tmp1, ignore_index = True)
model1_report

Unnamed: 0,Model,R-Squared Value,MSE
0,Regression using SGD,-6.27793e+26,5.4202e+28
