In [15]:
import numpy as np
from sklearn import datasets, model_selection, preprocessing

In [3]:
data = datasets.load_boston()

In [5]:
X = data.data
Y = data.target

In [9]:
(m,n) = X.shape

In [10]:
Y = np.reshape(Y, (m,1))

In [14]:
X = preprocessing.scale(X)

In [16]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3)

In [19]:
def cost_function(X, Y, theta):
    return (1/(2*X.shape[0]))*np.sum(np.square(np.dot(X, theta)- Y), axis=0)[0]

In [50]:
def stochastic_gradient_descent(X, Y, learning_rate=.00001, error=1e-9):
    (m,n) = X.shape
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
    
    
    for _ in range(10000):
        np.random.shuffle(X)
        cost_prev = cost_function(X, Y, theta)
        #update theta 
        theta -= learning_rate*(np.dot(X[:, :n+1].T, np.dot(X[:, :n+1], theta)-Y[:, :n+1]))
        cost_curr = cost_function(X, Y, theta)
        
        if abs(cost_curr - cost_prev)<error:
            break
        no_of_iter += 1
        
        
    return (no_of_iter, theta)
            
            

In [51]:
(no_of_iter, theta) = stochastic_gradient_descent(X_train, Y_train)

In [52]:
theta

array([[ 2.27686902e+01],
       [-6.49651797e-03],
       [-4.24501418e-02],
       [-2.84383387e-02],
       [ 9.82404903e-03],
       [ 2.72115227e-02],
       [-4.97348673e-02],
       [ 8.21785839e-03],
       [-2.90165931e-02],
       [ 4.99233638e-03],
       [ 3.05357821e-02],
       [-4.30419430e-02],
       [-8.66885116e-03],
       [-2.83584635e-02]])

In [53]:
from sklearn import linear_model

In [54]:
lgr = linear_model.LinearRegression()

In [55]:
lgr.fit(X_train[:, 1:], Y_train) 

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [56]:
lgr.intercept_

array([22.59512291])

In [57]:
lgr.coef_

array([[ 0.98553508,  0.17462664,  1.08573161, -1.47837788,  3.29886575,
        -0.6032804 , -2.71245504,  1.76788416, -1.92251667, -1.90603951,
         0.74772011, -3.2343568 ]])

In [65]:
def batch_gradient_descent(X, Y, learning_rate=.001, error=1e-9):
    (m,n) = X.shape
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
    
    
    while True:
        np.random.shuffle(X)
        cost_prev = cost_function(X, Y, theta)
        #update theta 
        theta -= learning_rate*(np.dot(X.T, np.dot(X, theta)-Y))
        cost_curr = cost_function(X, Y, theta)
        
        if abs(cost_curr - cost_prev)<error:
            break
        no_of_iter += 1
        
        
    return (no_of_iter, theta)

In [66]:
(no_of_iter, theta) = batch_gradient_descent(X_train, Y_train)

  


KeyboardInterrupt: 

In [60]:
theta

array([[ 2.27720472e+01],
       [ 1.26120504e-02],
       [ 2.86860933e-03],
       [-1.58295323e-02],
       [ 2.14457479e-02],
       [ 2.46032888e-02],
       [ 7.57413580e-03],
       [-3.55489519e-02],
       [ 6.47933195e-03],
       [-3.09540990e-03],
       [ 5.48726159e-02],
       [ 1.81861147e-02],
       [-4.61563097e-03],
       [ 2.25529947e-02]])