In [60]:
import numpy as np
from sklearn.datasets import make_classification
import time
from math import exp
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.utils import shuffle
X, y = make_classification(n_samples=10000)


In [54]:
sigmoid = lambda z : 1 / (1 + np.exp(-z))
logloss = lambda y_hat, y : np.sum(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)) / len(y_hat)
predict = lambda X: sigmoid(np.dot(X, betas.T)) > .5
predict_with_output = lambda X: (X > .5) * 1


def gradient_descent(X, y, beta, lr):
    y = y.reshape(-1, 1)
    gradients = np.dot(X.T, sigmoid(np.dot(X, beta.T)) - y) / len(y)
    new_betas = beta - lr * gradients.T

    return new_betas

def prepare_batches(X, y, batch_size):
    X_batch_list = list()
    y_batch_list = list()
    
    for i in range(len(y) // batch_size):
        X_batch_list.append(X[i * batch_size : i * batch_size + batch_size, :])
        y_batch_list.append(y[i * batch_size : i * batch_size + batch_size])
    
    if len(y) % batch_size > 0:
        X_batch_list.append(X[len(y) // batch_size * batch_size:, :])
        y_batch_list.append(y[len(y) // batch_size * batch_size:])

    return X_batch_list, y_batch_list

In [55]:
permutations = np.random.permutation(len(X))

X, y = np.asarray(X).squeeze(), np.asarray(y)

X = X[permutations, :]
y = y[permutations]

# To add beta 0
# temp = np.ones((X.shape[0], X.shape[1] + 1))
# temp[:, 1:] = X
# X = temp

len_test = len(X) // 5 
len_train = len(X) - len_test
X_test, y_test, X_train, y_train = X[:len_test, :], y[:len_test], X[len_test:, :], y[len_test:]

In [56]:
print("Shape of X matrix is: " + str(X.shape))
print("Shape of y matrix is: " + str(y.shape))
print("Shape of X_test matrix is: " + str(X_test.shape))
print("Shape of y_test matrix is: " + str(y_test.shape))
print("Shape of X_train matrix is: " + str(X_train.shape))
print("Shape of y_train matrix is: " + str(y_train.shape))

print("Desired samples feature vector: " + str(X[2]))
print("Desired samples ground truth: " + str(y[2]))

Shape of X matrix is: (10000, 20)
Shape of y matrix is: (10000,)
Shape of X_test matrix is: (2000, 20)
Shape of y_test matrix is: (2000,)
Shape of X_train matrix is: (8000, 20)
Shape of y_train matrix is: (8000,)
Desired samples feature vector: [ 0.84863467 -0.28172691 -1.87974165  0.54493492 -0.12580948 -2.13868216
 -0.09025264 -0.14493826  2.45129072 -1.04818521 -0.71652779 -0.33294089
  0.00610335  0.17149833  0.15327923  0.51216418 -0.14056954  0.93593331
  1.18642914 -0.47014197]
Desired samples ground truth: 0


In [62]:
def SGD(X_train,y_train,lr,max_iter,epi=1e-6,patience=2):
    train_error_hist, test_error_hist, test_acc_hist = MBGD(X_train=X_train,
                                                            y_train=y_train,
                                                            lr=lr,
                                                            batch_size=1,
                                                            max_iter=max_iter,
                                                            epi=epi,
                                                            patience=patience)
    return train_error_hist, test_error_hist, test_acc_hist

def MBGD(X_train,y_train,lr,batch_size,max_iter,epi=1e-6,patience=2):
    train_error_hist = list()
    test_error_hist = list()
    test_acc_hist = list()

    betas = np.random.random(X.shape[1]).reshape(1, -1)
    X_batch_list, y_batch_list = prepare_batches(X_train, y_train, batch_size)
    n_batches = len(y_batch_list)

    prev_average = 10000

    patience_counter = 0
    iteration_counter = 0 
    while iteration_counter < max_iter:
        for i in range(n_batches):
            X_batch = X_batch_list[i]
            y_batch = y_batch_list[i]

            betas = gradient_descent(X_batch, y_batch, betas, lr)

            y_hat = sigmoid(np.dot(X_batch, betas.T))
            train_error_hist.append(logloss(y_hat, y_batch) / len(y_batch))

            y_hat = sigmoid(np.dot(X_test, betas.T))
            test_error_hist.append(logloss(y_hat, y_test) / len(y_test))
            test_acc_hist.append(np.mean((predict_with_output(y_hat) == y_test.reshape(-1, 1)) * 1))

            iteration_counter += 1

        current_average = np.mean(train_error_hist[-n_batches:])

        if np.abs(prev_average - current_average) < epi:
            patience_counter += 1
        else:
            patience_counter = 0

        prev_average = current_average

        if patience_counter == patience:
            break
    return  train_error_hist, test_error_hist, test_acc_hist

In [63]:
train_error_hist, test_error_hist, test_acc_hist = MBGD(X_train,y_train,lr=0.05,batch_size=256,max_iter=1000,epi=1e-6,patience=2)

In [None]:
train_error_hist, test_error_hist, test_acc_hist = SGD(X_train,y_train,lr=0.05,max_iter=1000,epi=1e-6,patience=2)

In [None]:
plt.plot(test_error_hist)
plt.plot(train_error_hist)
plt.xlabel("#Iterations")
plt.ylabel("Total Loss")
plt.title("Loss vs Number of iterations")
plt.legend(("Test error", "Train error"))

In [None]:
plt.plot(test_acc_hist)
plt.xlabel("#Iterations")
plt.ylabel("Accuracy")
plt.title("Accuracy vs Number of iterations")