In [1]:
import numpy as np
from sklearn.datasets import make_classification
import time
from math import exp
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
# X_train, y_train = make_classification(n_samples=10000)
# X_test, y_test = make_classification(n_samples=1000)
X, y = make_classification(n_samples=10000)


In [3]:
sigmoid = lambda z : 1 / (1 + np.exp(-z))
logloss = lambda y_hat, y : np.sum(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)) / len(y_hat)
predict = lambda X: sigmoid(np.dot(X, betas.T)) > .5
predict_with_output = lambda X: (X > .5) * 1


def gradient_descent(X, y, beta, lr):
    y = y.reshape(-1, 1)
    gradients = np.dot(X.T, sigmoid(np.dot(X, beta.T)) - y) / len(y)
    new_betas = beta - lr * gradients.T

    return new_betas

def prepare_batches(X, y, batch_size):
    X_batch_list = list()
    y_batch_list = list()
    
    for i in range(len(y) // batch_size):
        X_batch_list.append(X[i * batch_size : i * batch_size + batch_size, :])
        y_batch_list.append(y[i * batch_size : i * batch_size + batch_size])
    
    if len(y) % batch_size > 0:
        X_batch_list.append(X[len(y) // batch_size * batch_size:, :])
        y_batch_list.append(y[len(y) // batch_size * batch_size:])

    return X_batch_list, y_batch_list

In [4]:
permutations = np.random.permutation(len(X))

X, y = np.asarray(X).squeeze(), np.asarray(y)

X = X[permutations, :]
y = y[permutations]

#To add beta 0
temp = np.ones((X.shape[0], X.shape[1] + 1))
temp[:, 1:] = X
X = temp

len_test = len(X) // 5 
len_train = len(X) - len_test
X_test, y_test, X_train, y_train = X[:len_test, :], y[:len_test], X[len_test:, :], y[len_test:]

In [5]:
print("Shape of X matrix is: " + str(X.shape))
print("Shape of y matrix is: " + str(y.shape))
print("Shape of X_test matrix is: " + str(X.shape))
print("Shape of y_test matrix is: " + str(X.shape))
print("Shape of X_train matrix is: " + str(X.shape))
print("Shape of y_train matrix is: " + str(X.shape))

print("Desired samples feature vector: " + str(X[2]))
print("Desired samples ground truth: " + str(y[2]))

Shape of X matrix is: (10000, 21)
Shape of y matrix is: (10000,)
Shape of X_test matrix is: (10000, 21)
Shape of y_test matrix is: (10000, 21)
Shape of X_train matrix is: (10000, 21)
Shape of y_train matrix is: (10000, 21)
Desired samples feature vector: [ 1.         -0.51926415  0.5940006  -0.93599479  0.53273792 -1.23959984
 -0.64952957  0.26893774  1.0919569  -0.34649526 -1.20836382  0.69807529
  0.41281416  0.62505391 -1.91812173  0.61770769  0.6808084   1.24744623
 -1.00572358  0.94005728 -1.18242773]
Desired samples ground truth: 0


In [10]:
betas = np.random.random(X.shape[1]).reshape(1, -1)

lr = 0.1
batch_size = 128
n_iterations = 10000

patience = 2
min_delta = 1e-2

In [None]:
train_error_hist = list()
test_error_hist = list()
test_acc_hist = list()

X_batch_list, y_batch_list = prepare_batches(X_train, y_train, batch_size)

n_batches = len(y_batch_list)

prev_average = 10000

patience_counter = 0
iteration_counter = 0 
while iteration_counter < n_iterations:
    for i in range(n_batches):
        X_batch = X_batch_list[i]
        y_batch = y_batch_list[i]

        betas = gradient_descent(X_batch, y_batch, betas, lr)
        
        y_hat = sigmoid(np.dot(X_batch, betas.T))
        train_error_hist.append(logloss(y_hat, y_batch) / len(y_batch))
              
        y_hat = sigmoid(np.dot(X_test, betas.T))
        test_error_hist.append(logloss(y_hat, y_test) / len(y_test))
        test_acc_hist.append(np.mean((predict_with_output(y_hat) == y_test.reshape(-1, 1)) * 1))
          
        iteration_counter += 1
        
    current_average = np.mean(train_error_hist[-n_batches:])
        
    if np.abs(prev_average - current_average) < min_delta:
        patience_counter += 1
    else:
        patience_counter = 0
        
    prev_average = current_average
    
    if patience_counter == patience:
        break

In [None]:
plt.plot(test_error_hist)
plt.plot(train_error_hist)
plt.xlabel("#Iterations")
plt.ylabel("Total Loss")
plt.title("Loss vs Number of iterations")
plt.legend(("Test error", "Train error"))

In [None]:
plt.plot(test_acc_hist)
plt.xlabel("#Iterations")
plt.ylabel("Accuracy")
plt.title("Accuracy vs Number of iterations")

In [None]:
def predict(row, coefficients):
    yhat = coefficients[0]*np.ones(row.shape[0])
    for i in range(len(row)-1):
        yhat += coefficients[i + 1] * row[i]
    return 1.0 / (1.0 + exp(-yhat))

def bgd(X_train, y_train, l_rate, n_epoch, coef = None):
    if coef==None:
        coef = [0.0 for i in range(X_train.shape[1])]
    for epoch in range(n_epoch):
        for i in range(X_train.shape[0]):
            row = X[i]

            yhat = predict(row, coef)
            error = y_train[i] - yhat
            coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
    return coef  

def mini_bgd(X_train, y_train, l_rate, n_epoch, coef = None):
    if coef==None:
        coef = [0.0 for i in range(X_train.shape[1])]
    for epoch in range(n_epoch):
        np.random.shuffle(X_train)
        for i in range(int(X_train.shape[0]/50)):
            row = X[i]
            yhat = predict(row, coef)
            error = y_train[i] - yhat
            coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
    return coef    

def sgd(X_train, y_train, l_rate, n_epoch, batch_size=100, coef = None):
    idx = np.arange(0,X_train.shape[0])
    if coef==None:
        coef = [0.0 for i in range(X_train.shape[1])]
    for epoch in range(n_epoch):
        np.random.shuffle(idx)
        for i in range(int(idx.shape[0]/batch_size)):
            row = X_train[idx[i*batch_size:(i+1)*batch_size]]
            yhat = predict(row,coef)
            error = y_train[idx[i*batch_size:(i+1)*batch_size]] - yhat
            coef[idx[i*batch_size:(i+1)*batch_size]] = coef[idx[i*batch_size:(i+1)*batch_size]] + l_rate * error * yhat * (1.0 - yhat) *row
#         for i in range(int(X_train.shape[0]/50)):
#             row = X[i]
#             yhat = predict(row, coef)
#             error = y_train[i] - yhat
#             coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
#             for i in range(len(row)-1):
#                 coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
    return coef
    

def logistic_regression(X_train, y_train, X_test, l_rate, n_epoch):
    predictions = list()
    coef = sgd(X_train, y_train, l_rate, n_epoch)
    for row in X_test:
        yhat = predict(row, coef)
        yhat = round(yhat)
        predictions.append(yhat)
    return(predictions)

def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0


def evaluate_algorithm(X, y, algorithm, n_folds, *args):
    kf = KFold(n_splits=n_folds)
    kf.get_n_splits(X)
    scores = []
    for train_index, test_index in kf.split(X):
        predicted = algorithm(X[train_index], y[train_index], X[test_index], *args)
        actual = y[test_index]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
    return scores

In [None]:
# evaluate algorithm
n_folds = 4
l_rate = 0.1
n_epoch = 100
start = time.process_time() 
scores = evaluate_algorithm(X, y, logistic_regression, n_folds, l_rate, n_epoch)
elapsed = (time.process_time() - start)
print("Time used:",elapsed)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))