In [2]:
import numpy as np

In [72]:
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([1, 0, 0, 1])


In [73]:
print(x_train.shape)
print(y_train.shape)

(4, 1, 2)
(4,)


In [74]:
#
#
#   O     O
#         O      O
#   O     O
#   X  W1 b1 W2  b2  Y
#

In [185]:
W1 = np.random.rand(2, 3)
W2 = np.random.rand(3, 1)
b1 = np.random.rand(1,3)
b2 = np.random.rand(1,1)

In [76]:
print(f'{W1.shape=} {W2.shape=} {b1.shape=} {b2.shape=}')

W1.shape=(2, 3) W2.shape=(3, 1) b1.shape=(1, 3) b2.shape=(1, 1)


In [90]:
def ReLU(a):
  return np.maximum(a, 0)

def ReLU_derivitive(a):
  return a > 0

def sigmoid(a):
  return 1 / (1 + np.exp(-1 * a))

def BinaryCrossEntropy(yhat, y):
  return -1*(y*np.log2(yhat) + (1-y)*np.log2(1-yhat))

In [78]:
def forward(x, W1, W2, b1, b2):
  z1 = np.dot(x, W1) + b1
  a1 = ReLU(z1)
  z2 = np.dot(a1, W2) + b2
  a2 = sigmoid(z2)
  yhat = a2
  return yhat

In [187]:
def backprop(x, W1, W2, b1, b2, y, yhat, alpha):
  z1 = np.dot(x, W1) + b1
  a1 = ReLU(z1)
  z2 = np.dot(a1, W2) + b2
  a2 = sigmoid(z2)

  dl_dz2 = a2 - y
  dl_db2 = dl_dz2
  dl_dW2 = np.dot(a1.T, dl_dz2)

  dl_da1 = np.dot(W2, dl_dz2)
  dl_dz1 = dl_da1 * ReLU_derivitive(z1).T

  dl_db1 = dl_dz1
  dl_dW1 = np.dot(dl_dz1, x).T

  b1 = b1 - alpha * dl_db1.T
  W1 = W1 - alpha * dl_dW1
  b2 = b2 - alpha * dl_db2
  W2 = W2 - alpha * dl_dW2

  return W1, W2, b1, b2

In [186]:
print('Before Learning')
print(forward(x_train[0], W1, W2, b1, b2))   # x = 0, 0    y = 1
print(forward(x_train[1], W1, W2, b1, b2))   # x = 0, 1    y = 0
print(forward(x_train[2], W1, W2, b1, b2))   # x = 1, 0    y = 0
print(forward(x_train[3], W1, W2, b1, b2))   # x = 1, 1    y = 1

Before Learning
[[0.7819563]]
[[0.83897195]]
[[0.88531668]]
[[0.91813449]]


In [188]:
alpha = 0.1
epochs = 500

for epoch in range(epochs):
  for i in range(4):
    yhat = forward(x_train[i], W1, W2, b1, b2)
    W1, W2, b1, b2 = backprop(x_train[i], W1, W2, b1, b2, y_train[i], yhat, alpha=0.1)


In [190]:
print('After Learning')
print(forward(x_train[0], W1, W2, b1, b2))   # x = 0, 0     y = 1
print(forward(x_train[1], W1, W2, b1, b2))   # x = 0, 0     y = 0
print(forward(x_train[2], W1, W2, b1, b2))   # x = 0, 0     y = 0
print(forward(x_train[3], W1, W2, b1, b2))   # x = 0, 0     y = 1

After Learning
[[0.95340432]]
[[0.0101688]]
[[0.01041628]]
[[0.99339687]]
