In [57]:
import numpy as np

In [58]:
def relu(Z):
  return np.maximum(0, Z)


def relu_backward(dA, Z):
  return np.where(Z > 0, dA, 0)


relu(np.array([[-5, 4], [0, 1]]))  # relu(Z)

relu_backward(
    np.array([[-0.2, -0.5], [-0.1, 0.2]]),  # dA
    np.array([[-5, 4], [0, 1]])  # Z
)

array([[0, 4],
       [0, 1]])

array([[ 0. , -0.5],
       [ 0. ,  0.2]])

In [59]:
def sigmoid(Z):
  return 1 / (1 + np.exp(-Z))


def sigmoid_backward(dA, Z):
  s = sigmoid(Z)
  return dA * s * (1 - s)


sigmoid(np.array([[-5, 4], [0, 1]]))  # sigmoid(Z)

sigmoid_backward(
    np.array([[-0.2, -0.5], [-0.1, 0.2]]),  # dA
    np.array([[-5, 4], [0, 1]])  # Z
)

array([[0.00669285, 0.98201379],
       [0.5       , 0.73105858]])

array([[-0.00132961, -0.00883135],
       [-0.025     ,  0.03932239]])

In [60]:
def bce_loss(A, Y):
  A = np.clip(A, 1e-9, 1 - 1e-9)
  return -np.mean(Y*np.log(A) + (1-Y)*np.log(1-A))


def bce_loss_backward(A, Y):
  n = Y.shape[0]
  A = np.clip(A, 1e-9, 1 - 1e-9)
  return (-Y/A + (1-Y)/(1-A)) / n


bce_loss(
    np.array([0.2, 0.1, 0.8, 0.9]),  # A
    np.array([1, 0, 1, 1])  # Y
)

bce_loss_backward(
    np.array([0.2, 0.1, 0.8, 0.9]),  # A
    np.array([1, 0, 1, 1])  # Y
)

np.float64(0.5108256237659906)

array([-1.25      ,  0.27777778, -0.3125    , -0.27777778])

In [61]:
class LinearLayer:
  def __init__(self, n_inputs, n_neurons):
    self.W = np.random.randn(n_inputs, n_neurons) * 0.01
    self.b = np.zeros(n_neurons)

    self.X = None
    self.dW = None
    self.db = None

  def forward(self, X):
    self.X = X
    return X @ self.W + self.b

  def backward(self, dOut):
    self.dW = self.X.T @ dOut
    self.db = dOut.sum(axis=0)
    return dOut @ self.W.T

  def update(self, learning_rate):
    self.W -= learning_rate * self.dW
    self.b -= learning_rate * self.db

In [62]:
# simple 2-layer architecture
class MLP:
  def __init__(self, n_inputs, n_hidden, n_outputs):
    self.layer1 = LinearLayer(n_inputs, n_hidden)
    self.layer2 = LinearLayer(n_hidden, n_outputs)

    self.Z1 = None
    self.Z2 = None
    self.A1 = None
    self.A2 = None

  def forward(self, X):
    self.Z1 = self.layer1.forward(X)
    self.A1 = relu(self.Z1)
    self.Z2 = self.layer2.forward(self.A1)
    self.A2 = sigmoid(self.Z2)
    return self.A2

  def backward(self, Y):
    dA2 = bce_loss_backward(self.A2, Y)
    dZ2 = sigmoid_backward(dA2, self.Z2)
    dA1 = self.layer2.backward(dZ2)
    dZ1 = relu_backward(dA1, self.Z1)
    self.layer1.backward(dZ1)

  def update(self, learning_rate):
    self.layer1.update(learning_rate)
    self.layer2.update(learning_rate)

  def predict(self, X, threshold=0.5):
    probs = self.forward(X)
    return (probs >= threshold).astype(int)

In [63]:
def train(model, X, Y,
          learning_rate=0.1, epochs=1000, print_every=100):
  loss_history = []

  for epoch in range(1, epochs+1):
    A2 = model.forward(X)
    loss = bce_loss(A2, Y)
    loss_history.append(loss)

    model.backward(Y)
    model.update(learning_rate)

    if epoch % print_every == 0 or epoch == 1:
      print(f"  Epoch {epoch:>4} | Loss: {loss:.4f}")

  return loss_history

In [64]:
X = np.array([
    [3.0,  4.0,  2],
    [2.0,  3.5,  3],
    [4.0,  4.5,  1],
    [1.5,  3.0,  2],
    [5.0,  3.8,  3],
    [8.0,  7.5,  0],
    [10.0, 8.0,  1],
    [7.0,  7.0,  0],
    [12.0, 9.0,  0],
    [6.0,  7.2,  1],
])

Y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]).reshape(-1, 1)

model = MLP(n_inputs=3, n_hidden=4, n_outputs=1)

_ = train(model, X, Y, learning_rate=0.1, epochs=1000, print_every=100)

  Epoch    1 | Loss: 0.6932
  Epoch  100 | Loss: 0.1399
  Epoch  200 | Loss: 0.0662
  Epoch  300 | Loss: 0.0458
  Epoch  400 | Loss: 0.0334
  Epoch  500 | Loss: 0.0251
  Epoch  600 | Loss: 0.0194
  Epoch  700 | Loss: 0.0154
  Epoch  800 | Loss: 0.0125
  Epoch  900 | Loss: 0.0104
  Epoch 1000 | Loss: 0.0088
