In [15]:
import numpy as np
from sklearn.datasets import make_classification
import time
from math import exp
from sklearn.model_selection import KFold
# X_train, y_train = make_classification(n_samples=10000)
# X_test, y_test = make_classification(n_samples=1000)
X, y = make_classification(n_samples=10000)


In [41]:
def predict(row, coefficients):
    yhat = coefficients[0]
    for i in range(len(row)-1):
        yhat += coefficients[i + 1] * row[i]
    return 1.0 / (1.0 + exp(-yhat))

def bgd(X_train, y_train, l_rate, n_epoch, coef = None):
    if coef==None:
        coef = [0.0 for i in range(X_train.shape[1])]
    for epoch in range(n_epoch):
        for i in range(X_train.shape[0]):
            row = X[i]

            yhat = predict(row, coef)
            error = y_train[i] - yhat
            coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
    return coef  

def mini_bgd(X_train, y_train, l_rate, n_epoch, coef = None):
    if coef==None:
        coef = [0.0 for i in range(X_train.shape[1])]
    for epoch in range(n_epoch):
        np.random.shuffle(X_train)
        for i in range(int(X_train.shape[0]/50)):
            row = X[i]
            yhat = predict(row, coef)
            error = y_train[i] - yhat
            coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
    return coef    

def sgd(X_train, y_train, l_rate, n_epoch, coef = None):
    if coef==None:
        coef = [0.0 for i in range(X_train.shape[1])]
    for epoch in range(n_epoch):
        np.random.shuffle(X_train)
        for i in range(int(X_train.shape[0]/50)):
            row = X[i]
            yhat = predict(row, coef)
            error = y_train[i] - yhat
            coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
    return coef
    

def logistic_regression(X_train, y_train, X_test, l_rate, n_epoch):
    predictions = list()
    coef = sgd(X_train, y_train, l_rate, n_epoch)
    for row in X_test:
        yhat = predict(row, coef)
        yhat = round(yhat)
        predictions.append(yhat)
    return(predictions)

def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0


def evaluate_algorithm(X, y, algorithm, n_folds, *args):
    kf = KFold(n_splits=n_folds)
    kf.get_n_splits(X)
    scores = []
    for train_index, test_index in kf.split(X):
        predicted = algorithm(X[train_index], y[train_index], X[test_index], *args)
        actual = y[test_index]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
    return scores

In [42]:
# evaluate algorithm
n_folds = 4
l_rate = 0.1
n_epoch = 100
start = time.process_time() 
scores = evaluate_algorithm(X, y, logistic_regression, n_folds, l_rate, n_epoch)
elapsed = (time.process_time() - start)
print("Time used:",elapsed)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

Time used: 3.1404461380000157
Scores: [55.32, 92.72, 92.92, 93.47999999999999]
Mean Accuracy: 83.610%
