In [157]:
import numpy as np

In [158]:
data = np.array([
    [1,1],
    [2,2],
    [3,2]
])

In [159]:
(m,n) = data.shape

In [160]:
    X = np.reshape(data[:, 0], (m,1))

In [161]:
Y = np.reshape(data[:, 1], (m,1))

In [162]:
def cost_function(X, Y, theta):
    return (1/(2*X.shape[0]))*np.sum(np.square(np.dot(X,theta)-Y), axis=0)[0]

In [163]:
def stochastic_gradient(X, Y, alpha=0.01, error=1e-9):
    (m,n) = X.shape
    
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
    
    np.random.shuffle(X)
    
    while True:
        
        cost_prev = cost_function(X, Y, theta)
        
        theta -= (alpha)*np.reshape(X[0, :].T, (n+1,1)) * (np.dot(X[0, :], theta)-Y[0, :])[0]
        cost_curr = cost_function(X, Y, theta)
                
        if abs(cost_prev - cost_curr)<error:
            break;
        no_of_iter += 1
        
    return (theta, no_of_iter)

In [164]:
(theta_stochastic, no_of_iter_stochastic) = stochastic_gradient(X, Y)

In [165]:
theta_stochastic, no_of_iter_stochastic

(array([[0.19999999],
        [0.39999999]]), 335)

In [166]:
def batch_gradient(X, Y, alpha=0.01, error=1e-9):
    (m,n) = X.shape
    
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
        
    while True:
        
        cost_prev = cost_function(X, Y, theta)
        
        theta -= (alpha/m)*np.dot(X.T, np.dot(X, theta)-Y)
        cost_curr = cost_function(X, Y, theta)
                
        if abs(cost_prev - cost_curr)<error:
            break;
        no_of_iter += 1
        
    return (theta, no_of_iter)

In [167]:
(theta_batch, no_of_iter_batch) = batch_gradient(X, Y)

In [168]:
theta_batch

array([[0.66426218],
       [0.50105774]])

In [169]:
no_of_iter_batch

4196

In [190]:
from sklearn import datasets, preprocessing, model_selection, linear_model

In [171]:
# load boston housing dataset
dataset = datasets.load_boston()

In [172]:
X = dataset.data

In [176]:
(m,n) = X.shape

In [177]:
Y = np.reshape(dataset.target, (m,1))

In [180]:
X = preprocessing.scale(X)

In [182]:
# split the data into train and test
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=.3)

In [183]:
(theta_stoch_boston, no_of_iter_stoch_boston) = stochastic_gradient(X_train, Y_train)

In [184]:
theta_stoch_boston

array([[ 1.33322123],
       [-0.5212545 ],
       [-0.65024181],
       [-0.82223287],
       [-0.3634342 ],
       [-1.22878619],
       [-1.68216084],
       [ 1.26703985],
       [ 1.31505031],
       [-1.00381179],
       [-1.38759403],
       [-0.3424484 ],
       [ 0.5880198 ],
       [ 3.3932167 ]])

In [185]:
no_of_iter_stoch_boston

131

In [187]:
(theta_batch_boston, no_of_iter_batch_boston) = batch_gradient(X_train, Y_train)

In [188]:
theta_batch_boston

array([[22.51551621],
       [-0.94688151],
       [ 1.15301137],
       [ 0.12216097],
       [ 0.97736386],
       [-1.89125407],
       [ 2.84719689],
       [ 0.11738463],
       [-3.11294438],
       [ 2.19551886],
       [-1.75202206],
       [-1.98813683],
       [ 0.81193309],
       [-3.88052806]])

In [189]:
no_of_iter_batch_boston

9373

In [192]:
lgr = linear_model.LinearRegression()

In [193]:
lgr.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [194]:
lgr.intercept_

array([22.51550792])

In [195]:
lgr.coef_

array([[-0.9470697 ,  1.1533543 ,  0.12333316,  0.97719701, -1.89149221,
         2.84695099,  0.11769256, -3.11269112,  2.19851778, -1.75527183,
        -1.98828721,  0.81201422, -3.88074667]])