In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [3]:
def loss(y, y_hat):
    N = np.shape(y)[0]
    return np.sum(np.power(y_hat - y, 2)) / (2 * N)

In [4]:
def hypothesis(X, W):
    return sigmoid(np.dot(X, W))

In [5]:
def GD(X, y, learning_rate=0.0001, iterations=100):
    N, m = np.shape(X)
    W_init = np.random.randn(m, 1)
    W = [W_init]
    loss_history = np.zeros(iterations)
    for i in range(iterations):
        y_hat = hypothesis(X, W[-1])
        _loss = loss(y, y_hat)
        W_new = W[-1] - learning_rate * np.dot(X.T, (y_hat - y))
        W.append(W_new)
        loss_history[i] = _loss
        if np.linalg.norm(W[-2] - W[-1]) < 1e-3:
            break
    return W[-1], loss_history

N = X.shape[0]
ones = np.ones((N, 1))
X_bar = np.concatenate((ones, X), axis=1)
W, loss_history = GD(X_bar, y)

# plt.plot(range(len(loss_history)), loss_history, 'b.')
W[-1]

array([[2.51443499],
       [3.51966672]])

## SGD

In [22]:
def SGD(X, y, learning_rate=0.01, iterations=100):
    N, m = np.shape(X)
    W_init = np.random.randn(m, 1)
    W = [W_init]
    loss_history = np.zeros(iterations)
    for i in range(iterations):
        _loss = 0.0
        # epoch
        for index, x in enumerate(X):
            x_i = x.reshape(1, -1)
            y_i = y[index]
            y_hat = hypothesis(x_i, W[-1])
            _loss += loss(y_i, y_hat)
            W_new = W[-1] - learning_rate * np.dot(x_i.T, (y_hat - y_i))
            
        W.append(W_new)
        loss_history[i] = _loss
        if np.linalg.norm(W[-1] - W[-2]) < 1e-3:
            break
    return W[-1], loss_history

W, loss_history = SGD(X_bar, y)
W[-1]
# plt.plot(range(len(loss_history)), loss_history, 'b.')

array([[1.89411246],
       [4.11359939]])

## Mini-batch

In [23]:
def get_batchs(X, y, batch_size):
    for i in range(0, len(X), batch_size):
        x_batch = X[i:i+batch_size]
        y_batch = y[i:i+batch_size]
        yield x_batch, y_batch
    
    
def Mini_batch(X, y, learning_rate=0.01, iterations=100, batch_size=28):
    N, m = np.shape(X)
    W_init = np.random.randn(m, 1)
    W = [W_init]
    loss_history = np.zeros(iterations)
    for i in range(iterations):
        _loss = 0.0
        # epoch
        for x_batch, y_batch in get_batchs(X, y, batch_size):
            y_hat = hypothesis(x_batch, W[-1])
            _loss += loss(y_batch, y_hat)
            W_new = W[-1] - learning_rate * np.dot(x_batch.T, (y_hat - y_batch))
        
        W.append(W_new) 
        loss_history[i] = _loss
        if np.linalg.norm(W[-2] - W[-1]) < 1e-3:
            break   
    return W[-1], loss_history

W, loss_history = Mini_batch(X_bar, y)
W[-1]
# plt.plot(range(len(loss_history)), loss_history, 'b.')

array([[3.91302778],
       [3.06658212]])