In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [27]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [28]:
def loss(y, y_hat):
    return -np.sum(np.multiply(y, np.log(y_hat))+np.multiply(1-y, np.log(1-y_hat)))

In [29]:
def hypothesis(X, W):
    return sigmoid(np.dot(X, W))

In [32]:
def GD(X, y, learning_rate=0.0001, iterations=10):
    N, m = np.shape(X)
    W_init = np.random.randn(m, 1)
    W = [W_init]
    loss_history = np.zeros(iterations)
    for i in range(iterations):
        y_hat = hypothesis(X, W[-1])
        _loss = loss(y, y_hat)
        W_new = W[-1] - learning_rate * np.dot(X.T, (y_hat - y))
        W.append(W_new)
        loss_history[i] = _loss
        if np.linalg.norm(W[-2] - W[-1]) < 1e-3:
            break
    return W[-1], loss_history



## SGD

In [6]:
def SGD(X, y, learning_rate=0.01, iterations=1000):
    N, m = np.shape(X)
    W_init = np.random.randn(m, 1)
    W = [W_init]
    loss_history = np.zeros(iterations)
    for i in range(iterations):
        _loss = 0.0
        # epoch
        for index, x in enumerate(X):
            x_i = x.reshape(1, -1)
            y_i = y[index]
            y_hat = hypothesis(x_i, W[-1])
            _loss += loss(y_i, y_hat)
            W_new = W[-1] - learning_rate * np.dot(x_i.T, (y_hat - y_i))
            
        W.append(W_new)
        loss_history[i] = _loss
        if np.linalg.norm(W[-1] - W[-2]) < 1e-3:
            break
    return W[-1], loss_history

# plt.plot(range(len(loss_history)), loss_history, 'b.')

## Mini-batch

In [7]:
def get_batchs(X, y, batch_size):
    for i in range(0, len(X), batch_size):
        x_batch = X[i:i+batch_size]
        y_batch = y[i:i+batch_size]
        yield x_batch, y_batch
    
    
def Mini_batch(X, y, learning_rate=0.01, iterations=1000, batch_size=28):
    N, m = np.shape(X)
    W_init = np.random.randn(m, 1)
    W = [W_init]
    loss_history = np.zeros(iterations)
    for i in range(iterations):
        _loss = 0.0
        # epoch
        for x_batch, y_batch in get_batchs(X, y, batch_size):
            y_hat = hypothesis(x_batch, W[-1])
            _loss += loss(y_batch, y_hat)
            W_new = W[-1] - learning_rate * np.dot(x_batch.T, (y_hat - y_batch))
        
        W.append(W_new) 
        loss_history[i] = _loss
        if np.linalg.norm(W[-2] - W[-1]) < 1e-3:
            break   
    return W[-1], loss_history



In [8]:
import pickle
with open('data.pkl', 'rb') as f:
    data = pickle.load(f)
X, y = data[1], data[2]
y = y.reshape(-1, 1)
print(X.shape, y.shape)

(614, 8) (614, 1)


In [None]:
N = X.shape[0]
ones = np.ones((N, 1))
X_bar = np.concatenate((ones, X), axis=1)
W, loss_history = GD(X_bar, y)

plt.plot(range(len(loss_history)), loss_history, 'b.')
print(W)

In [None]:
W, loss_history = SGD(X_bar, y)
print(W)
plt.plot(range(len(loss_history)), loss_history, 'b.')