In [1]:
import numpy as np

np.random.seed(50)
num_observations = 7000

x1 = np.random.multivariate_normal([0, 0], [[1, .75],[.75, 1]], num_observations)
x2 = np.random.multivariate_normal([1, 4], [[1, .75],[.75, 1]], num_observations)

features = np.vstack((x1, x2)).astype(np.float32)
labels = np.hstack((np.zeros(num_observations), np.ones(num_observations)))

In [2]:
def sigmoid_calculation(scores):
    return 1 / (1 + np.exp(-scores))

In [3]:
def l1_calculation(features, target, weights):
    scores = np.dot(features, weights)
    ll = np.sum(target*scores - np.log(1 + np.exp(scores)))
    return ll

In [4]:
def logistic_regression_scratch(add_intercept, features, target, num_steps, learning_rate):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1))
        features = np.hstack((intercept, features))        
    weights = np.zeros(features.shape[1])    
    for step in range(num_steps):
        scores = np.dot(features, weights)
        predictions = sigmoid_calculation(scores)        
        output_error_signal = target - predictions        
        gradient = np.dot(features.T, output_error_signal)
        weights += learning_rate * gradient        
        if step % 10000 == 0:
            print(l1_calculation(features, target, weights))        
    return weights

In [5]:
weights = logistic_regression_scratch(add_intercept=True, features=features, target=labels, num_steps=50000, learning_rate=5e-5)

-5910.224466188034
-215.14131257292144
-210.92053878660204
-210.16078383476204
-209.97474804416817


In [6]:
print(weights)

[-13.44976583  -4.83354338   7.78664271]


In [7]:
final_scores = np.dot(np.hstack((np.ones((features.shape[0], 1)),features)), weights)
preds = np.round(sigmoid_calculation(final_scores))

print('Accuracy L1 scratch: {0}'.format((preds == labels).sum().astype(float) / len(preds)))

Accuracy L1 scratch: 0.9937857142857143
