In [29]:
from sklearn import datasets
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np

In [2]:
#load the dataset
boston = datasets.load_boston()

In [3]:
X = boston.data

In [4]:
Y = boston.target

In [5]:
X.shape

(506, 13)

In [7]:
# preprocessing
X = preprocessing.scale(X)

In [9]:
# split data into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.3, random_state=42)

# standard sklearn linear regression

In [10]:
lgr = linear_model.LinearRegression()

In [11]:
lgr.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [12]:
lgr.intercept_

22.50945471291039

In [13]:
lgr.coef_

array([-1.14691411,  0.83432605,  0.33940694,  0.79163612, -1.784727  ,
        2.84783949, -0.30429306, -2.91562521,  2.11140045, -1.46519951,
       -1.9696347 ,  1.07567771, -3.90310727])

In [20]:
Y_test_pred = lgr.predict(X_test)

In [22]:
accuracy_percentage = metrics.r2_score(Y_test, Y_test_pred)*100

In [28]:
print("Accuracy:",accuracy_percentage, "%")

Accuracy: 71.12260057484923 %


# closed form solution

X ~ inv(A.T * A) * (A.T *b)

In [31]:
(m_train, n_train) = X_train.shape

In [36]:
A_train = np.hstack([np.ones((m_train,1)), X_train])

In [38]:
b_train = np.reshape(Y_train, (m_train,1))

In [40]:
# closed form solution 
theta = np.dot(np.linalg.inv(np.dot(A_train.T, A_train)), np.dot(A_train.T, b_train))

In [41]:
theta

array([[22.50945471],
       [-1.14691411],
       [ 0.83432605],
       [ 0.33940694],
       [ 0.79163612],
       [-1.784727  ],
       [ 2.84783949],
       [-0.30429306],
       [-2.91562521],
       [ 2.11140045],
       [-1.46519951],
       [-1.9696347 ],
       [ 1.07567771],
       [-3.90310727]])

# Batch gradient descent Algorithm

In [47]:
def cost_function(X, Y, theta):
    return (1/(2*X.shape[0])) * np.sum((np.dot(X, theta) - Y)**2)

In [51]:
def batch_gradient_descent(X, Y, learning_rate=0.001, error=1e-9):
    (m,n) = X.shape
    # add bias term
    X  = np.hstack([np.ones((m,1)), X])
    
    # reshape Y
    Y = np.reshape(Y, (m,1))

    # initialize theta
    theta = np.zeros((n+1,1))
    
    no_of_iterations = 0
    
    while True:
        cost_prev = cost_function(X, Y, theta)
        # update theta (the parameters)
        
        theta -= (learning_rate/m)* np.dot(X.T, np.dot(X, theta)- Y)
        cost_curr = cost_function(X, Y, theta)
        
        if abs(cost_curr- cost_prev)<error:
            break
        no_of_iterations += 1
        
        
        
    return (theta, no_of_iterations)
    

In [52]:
(theta, no_of_iterations) = batch_gradient_descent(X_train, Y_train)

In [53]:
no_of_iterations

82815

In [54]:
theta

array([[22.50939809],
       [-1.14605703],
       [ 0.83237076],
       [ 0.33577892],
       [ 0.79224078],
       [-1.78398153],
       [ 2.84844691],
       [-0.30475034],
       [-2.91507539],
       [ 2.10121525],
       [-1.45375256],
       [-1.96941798],
       [ 1.07557116],
       [-3.90279386]])

# stochastic gradient descent Algorithm

In [46]:
def stochastic_gradient_descent(X, Y, learning_rate=0.001, error=1e-9):
    pass