In [1]:
# Logistic regression from scratch
import pandas as pd
import numpy as np

# Make randomness repeatable for results
np.random.seed(233)

data = pd.read_csv("diabetes.csv", sep=",")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


# Initialisation

In [2]:
# The input data (each column is an instance)
x = data.loc[:, "Pregnancies":"Age"].to_numpy().T

# Number of features and instances
nx, ni = x.shape
print(f'There are {nx} featues and {ni} instances')

# The expected (real) output
y = data.loc[:, "Outcome"].to_numpy()
y = y.reshape(1, ni)

There are 8 featues and 768 instances


In [3]:
# Single node means 1 weight for each input, and 1 bias for each instance
W = np.random.rand(nx, 1) * 0.001 # Small weights
b = np.zeros((1, ni))
print(f'The weights have shape {W.shape} and bias has shape {b.shape}')

The weights have shape (8, 1) and bias has shape (1, 768)


# Forward Propagation

In [4]:
# Real number outputs of the network for every instance
def forward_prop(W, x, b):
    return np.matmul(W.T, x) + b

z = forward_prop(W, x, b)
print(f'The output has shape {z.shape}')

The output has shape (1, 768)


# Sigmoid Function

In [5]:
# Use Sigmoid function to compress to 0-1 range as probability
def sigmoid(z):
    # Log loss is undefined for probability value of 1 and 0
    # Use very small epsilon to clip result
    eps = 1e-15 
    return np.maximum(
        eps, np.minimum(
            1-eps, np.divide(1, 1 + np.exp(-z))
        )
    )

y_hat = sigmoid(z)

# Loss Function

In [6]:
def log_loss(y, y_hat):
    # Remember these operations are element-wise
    a = np.multiply(y, np.log(y_hat))
    b = 1 - y
    c = np.log(1 - y_hat)
    return -(a + np.multiply(b,c))

L = log_loss(y, y_hat)

# Cost Function

In [7]:
def cost(L):
    return np.average(L)

cost(L)

0.7212521415177289

# Back Propogation

In [8]:
# Work backwards through the chain to get loss derivative with respect to weights and biases
def backpropogation(y_hat, y):
    # Deriviative of log loss (with respect to y_hat)
    # dy_hat = np.divide(-y, y_hat) + np.divide(1 - y, 1 - y_hat)

    # Derivative of log loss (with respect to sigmoid input)
    dz = y_hat - y

    # Derivative of log less with respect to weights and biases simplifies to
    dw = np.multiply(x, dz)
    db = dz

    return dw, db

# Gradient Descent

In [9]:
alpha = 0.0001 # Learning rate

for epoch in range(10000):
    z = forward_prop(W, x, b)
    y_hat = sigmoid(z)

    if ((epoch + 1) % 1000 == 0):
        L = log_loss(y, y_hat)
        print(f'Cost at epoch {epoch + 1}: {cost(L)}')

    dw, db = backpropogation(y_hat, y)

    # Remember to average adjustment from every instance
    dw = np.average(dw, axis=1).reshape(8, 1)
    db = np.average(db, axis=0).reshape(1, ni)

    # Update the weights and biases
    W = W - alpha * dw
    b = b - alpha * db

Cost at epoch 1000: 0.5981020200456868
Cost at epoch 2000: 0.5757004199412606
Cost at epoch 3000: 0.5550461511959942
Cost at epoch 4000: 0.5358094444207468
Cost at epoch 5000: 0.5177633715229845
Cost at epoch 6000: 0.500743064233804
Cost at epoch 7000: 0.4846266855559085
Cost at epoch 8000: 0.4693221305644966
Cost at epoch 9000: 0.4547578354131359
Cost at epoch 10000: 0.44087648896505954
