# Linear regression model from library Sklearn

In [1]:
from sklearn import linear_model
import numpy as np
import matplotlib.pyplot as plt

In [2]:
np.random.seed(2)

In [4]:
X = np.random.rand(1000, 1)
Y = 4 + 3*X + 0.2*np.random.randn(1000, 1)

one = np.ones((X.shape[0], 1))
X_bar = np.concatenate((X, one), axis = 1)

model = linear_model.LinearRegression(fit_intercept=False).fit(X_bar, Y)
w = model.coef_

print('Result of linear regression model w:', w)

Result of linear regression model w: [[3.01021115 3.99177204]]


In [None]:
def differential(x_bar, y, w):
    N = x_bar.shape[0]
    return (1/N)*X_bar.T.dot(X_bar.dot(w) - y) 

def cost(x_bar, y, w):
    N = x_bar.shape[0]
    return 0.5*(1/N)*np.linalg.norm(y - x_bar.dot(w), 2)**2

def gradient_descent(x_bar, y, w0, eta, differential, iterations = 1000):
    w = [w0]
    for it in range(iterations):
        w_new = w[-1] - eta*differential(x_bar, y, w[-1])
        if np.linalg.norm(differential(x_bar, y, w_new))/len(w_new) < 1e-5:
            break
        w.append(w_new)   
    
    return np.array(w), it

In [None]:
def numerical_gradient(x_bar, y, w, cost):
    eps = 1e-4
    g = np.zeros_like(w)
    for i in range(len(w)):
        w_p = w.copy()
        w_n = w.copy()
        w_p[i] += eps 
        w_n[i] -= eps
        g[i] = (cost(x_bar, y, w_p) - cost(x_bar, y, w_n))/(2*eps)
    return g 

def check_gradient(w, cost, differential):
    w = np.random.rand(w.shape[0], w.shape[1])
    grad1 = differential(X_bar, Y, w)
    grad2 = numerical_gradient(X_bar, Y, w, cost)
    return True if np.linalg.norm(grad1 - grad2) < 1e-6 else False 

print('Checking gradient formula...', check_gradient(np.random.rand(2, 1), cost, differential))

In [132]:
def n_batch_GD_NAG(x_bar, y, w_init, eta, epochs = 100, batch_size = 32, gamma = 0.9):

    def n_batch_differential(x_bar, y, w, i, random_id, batch_size, n):
        current_id = random_id[i*batch_size : i*batch_size + n]
        xi_n = np.array(x_bar[current_id, :]) # (n,2)
        yi_n = np.array(y[current_id]) # (n,1)
        N = xi_n.shape[0]
        return (1/N)*xi_n.T.dot(xi_n.dot(w) - yi_n)

    w = [w_init]
    v_old = np.zeros_like(w_init)

    w_last_check = w_init
    iter_check_w = 10
    N = X.shape[0]
    count = 0
    for epoch in range(epochs):
        # shuffle data 
        random_id = np.random.permutation(N)
        times = int(N/batch_size) if (N % batch_size == 0) else int(N/batch_size) + 1
        for i in range(times):
            count += 1
            n = batch_size if (i + 1) <= N/batch_size else int(N % batch_size)
            v_new = gamma*v_old + eta*n_batch_differential(x_bar, y, w[-1] - gamma*v_old, i, random_id, batch_size, n)
            w_new = w[-1] - v_new
            w.append(w_new)
            v_old = v_new 
            if count % iter_check_w == 0:              
                if np.linalg.norm(w_new - w_last_check)/len(w_init) < 1e-3:                                    
                    return w[-1], epoch, i, times - 1
                w_last_check = w_new
    return w[-1], epoch, times - 1, times - 1

def Batch_GD(x_bar, y, w_init, eta, epochs = 100):
    N = x_bar.shape[0]
    w, epoch, iteration, total = n_batch_GD_NAG(x_bar, y, w_init, eta, epochs, N)
    return w, epoch, iteration, total

def Stochastic_GD(x_bar, y, w_init, eta, epochs = 100):
    w, epoch, iteration, total = n_batch_GD_NAG(x_bar, y, w_init, eta, epochs, 1, 0.5)
    return w, epoch, iteration, total

def MiniBatch_GD(x_bar, y, w_init, eta, epochs = 100, batch_size = 32):
    w, epoch, iteration, total = n_batch_GD_NAG(x_bar, y, w_init, eta, epochs, batch_size)
    return w, epoch, iteration, total

w_init = np.array([[2], [1]])
learning_rate = 0.05
epochs = 25

w_BGD, epoch_BGD, iteration, total = Batch_GD(X_bar, Y, w_init, learning_rate, epochs)
print("Result tuning from Batch gradient descent with learning rate %.2f after %d epochs at iteration %d/%d w: [%.2f, %.2f]" %(learning_rate, epoch_BGD + 1, iteration + 1, total + 1, w_BGD[0], w_BGD[1]))

w_SGD, epoch_SGD, iteration, total = Stochastic_GD(X_bar, Y, w_init, learning_rate, epochs)
print("Result tuning from Stochastic gradient descent with learning rate %.2f after %d epochs at iteration %d/%d w: [%.2f, %.2f]" %(learning_rate, epoch_SGD + 1, iteration + 1, total + 1, w_SGD[0], w_SGD[1]))

w_MiniBatchGD, epoch_MiniBatchGD, iteration, total = MiniBatch_GD(X_bar, Y, w_init, learning_rate, epochs, 128)
print("Result tuning from Mini-batch gradient descent with learning rate %.2f after %d epochs at iteration %d/%d w: [%.2f, %.2f]" %(learning_rate, epoch_MiniBatchGD + 1, iteration + 1, total + 1, w_MiniBatchGD[0], w_MiniBatchGD[1]))

Result tuning from Batch gradient descent with learning rate 0.05 after 25 epochs at iteration 1/1 w: [3.09, 3.54]
Result tuning from Stochastic gradient descent with learning rate 0.05 after 3 epochs at iteration 240/1000 w: [3.06, 3.97]
Result tuning from Mini-batch gradient descent with learning rate 0.05 after 25 epochs at iteration 8/8 w: [3.01, 3.99]
