In [25]:
from sklearn import datasets
from sklearn import preprocessing
from sklearn import model_selection
import numpy as np
from sklearn import linear_model

In [3]:
boston = datasets.load_boston()

In [4]:
X = boston.data

In [6]:
Y = boston.target

In [8]:
# preprocessing step
X = preprocessing.scale(X)

In [11]:
# split the data into train and test
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=.3, random_state=23)

In [123]:
def cost_function(X, Y, theta):
    return (1/(2*X.shape[0]))*np.sum((np.dot(X, theta) - Y)**2)

In [20]:
def batchgradient_descent(X, Y, alpha=0.001, error=1e-9):
    (m,n) = X.shape
    X = np.hstack([np.ones((m,1)), X])
    Y = np.reshape(Y, (m,1))
    
    theta = np.zeros((n+1,1))
    
    no_of_iterations = 0
    
    while True:
        cost_prev = cost_function(X, Y, theta)
        #update theta
        theta -= (alpha/m)*np.dot(X.T, np.dot(X, theta)-Y)
        cost_curr = cost_function(X, Y, theta)
        if abs(cost_prev-cost_curr)<error:
            break
        no_of_iterations += 1
        
    return (theta, no_of_iterations)

In [30]:
(theta, no_of_iterations) = batchgradient_descent(X_train, Y_train)

In [31]:
theta

array([[ 2.26604071e+01],
       [-8.01881097e-01],
       [ 1.00706777e+00],
       [-1.59269389e-01],
       [ 8.48667516e-01],
       [-1.88920895e+00],
       [ 2.99905033e+00],
       [ 1.94029908e-02],
       [-2.95185871e+00],
       [ 2.21210876e+00],
       [-1.24810586e+00],
       [-1.83937042e+00],
       [ 1.33923850e+00],
       [-3.77989487e+00]])

In [32]:
no_of_iterations

81724

In [33]:
# using sklearn standard linear regression
lgr = linear_model.LinearRegression()

In [34]:
lgr.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [35]:
lgr.intercept_

22.660469203062053

In [36]:
lgr.coef_

array([-0.80274896,  1.00894334, -0.15435574,  0.84802077, -1.89043441,
        2.99856409,  0.02026839, -2.9518758 ,  2.22188773, -1.25979872,
       -1.83934894,  1.3393287 , -3.77997543])

In [38]:
lgr.score(X_test, Y_test)

0.6947991644651342

In [108]:
def stochastic_gradient(X, Y, alpha=0.01, error=1e-9):
    (m,n) = X.shape
    X = np.hstack([np.ones((m,1)), X])
    Y = np.reshape(Y, (m,1))
    
    theta = np.zeros((n+1,1))
    
    no_of_iterations = 0
    
    #shuffle the data randomly
    np.random.shuffle(X)
    
    i = 0
    
    while i<m:
        cost_prev  = cost_function(X, Y, theta)
        #update theta
        theta -= alpha*np.reshape(X[i, :], (n+1,1))*(np.dot(X[i, :], theta)-Y[i, :])[0]
        cost_curr  = cost_function(X,Y,theta)
        
        if abs(cost_prev - cost_curr)<error:
            break
        i += 1
        no_of_iterations += 1
    
    return (theta, no_of_iterations)

In [109]:
(theta, no_of_iterations) = stochastic_gradient(X_train, Y_train)

In [110]:
theta

array([[21.76244377],
       [ 0.6731463 ],
       [-0.02484161],
       [-0.07573858],
       [ 1.07439418],
       [-0.15285147],
       [ 0.3711719 ],
       [-0.28324594],
       [ 0.08319557],
       [ 0.28824682],
       [-0.14319147],
       [-0.1400069 ],
       [ 0.7863527 ],
       [-0.1504594 ]])

In [111]:
no_of_iterations

354