In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [40]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [41]:
def loss_single(y, y_hat):
    return (1 / 2) * (y - y_hat)**2

def loss_single_deriv(y, y_hat):
    return y_hat - y

In [115]:
# 10 examples, 2 features
m = 10
n = 2

# circular decision boundary
X = np.random.uniform(-1, 1, (n, m))
radius = 0.5
Y = (np.sum(X**2, axis=0) < radius**2).astype(int).reshape(1, m)

print("Generated X:", X)
print("Generated Y:", Y)

print(f"X: {X.shape}")
print(f"Y: {Y.shape}")

layers_units = [n, 3, 3, 1]
L = len(layers_units) - 1
params = {}
for l in range(1, len(layers_units)):
    params[f'W{l}'] = np.random.rand(layers_units[l], layers_units[l - 1])
    print(f"W{l}: {params[f'W{l}'].shape}")
    params[f'b{l}'] = np.random.rand(layers_units[l], 1)
    print(f"b{l}: {params[f'b{l}'].shape}")

X: (2, 10)
Y: (1, 10)
Generated X: [[-0.07333335 -0.37806487  0.89589735  0.16204897 -0.57516011 -0.83578052
   0.3203691  -0.98897501  0.9181493  -0.90111833]
 [-0.3481666  -0.2048818   0.70670118 -0.60400867  0.26054667  0.61438706
  -0.37640763  0.1059947   0.4710203   0.73283378]]
Generated Y: [[1 1 0 0 0 0 1 0 0 0]]
W1: (3, 2)
b1: (3, 1)
W2: (3, 3)
b2: (3, 1)
W3: (1, 3)
b3: (1, 1)


In [116]:
params

{'W1': array([[0.82395336, 0.8160669 ],
        [0.8970045 , 0.2514856 ],
        [0.72821062, 0.92018234]]),
 'b1': array([[0.86963659],
        [0.0976739 ],
        [0.50624384]]),
 'W2': array([[0.57025989, 0.19104644, 0.2485448 ],
        [0.89722149, 0.55715563, 0.58877266],
        [0.35816026, 0.62684344, 0.42189271]]),
 'b2': array([[0.97938399],
        [0.50416415],
        [0.51336352]]),
 'W3': array([[0.13382087, 0.87339264, 0.42719997]]),
 'b3': array([[0.77902457]])}

In [117]:
x = X[:, 0].reshape(-1, 1)
print(f"x: {x.shape}")
y = Y[:, 0].reshape(-1, 1)
print(f"y: {y.shape}")

x: (2, 1)
y: (1, 1)


In [126]:
zs = {}
activations = {}
activations['a0'] = x

for l in range(1, len(layers_units)):
    zs[f'z{l}'] = np.dot(params[f'W{l}'], activations[f'a{l - 1}']) + params[f'b{l}']
    activations[f'a{l}'] = sigmoid(zs[f'z{l}'])

In [127]:
zs, activations

({'z1': array([[ 0.52509114],
         [-0.05566097],
         [ 0.13246891]]),
  'z2': array([[1.56306154],
         [1.65263563],
         [1.26802705]]),
  'z3': array([[1.95604765]])},
 {'a0': array([[-0.07333335],
         [-0.3481666 ]]),
  'a1': array([[0.62833747],
         [0.48608835],
         [0.53306888]]),
  'a2': array([[0.82679222],
         [0.83924695],
         [0.78040482]]),
  'a3': array([[0.87610458]])})

In [128]:
loss = loss_single(y, activations[f'a{L}'])

In [129]:
loss

array([[0.00767504]])

In [130]:
grads = {}

grads[f'd{L}'] = np.multiply(loss_single_deriv(y, activations[f'a{L}']), sigmoid_deriv(zs[f'z{L}']))
grads[f'W{L}'] = np.dot(grads[f'd{L}'], activations[f'a{L - 1}'].T)
grads[f'b{L}'] = grads[f'd{L}']

for l in range(L - 1, 0, -1):
    grads[f'd{l}'] = np.multiply(np.dot(params[f'W{l + 1}'].T, grads[f'd{l + 1}']), sigmoid_deriv(zs[f'z{l}']))
    grads[f'W{l}'] = np.dot(grads[f'd{l}'], activations[f'a{l - 1}'].T)
    grads[f'b{l}'] = grads[f'd{l}']

In [131]:
grads

{'d3': array([[-0.01344827]]),
 'W3': array([[-0.01111893, -0.01128642, -0.0104951 ]]),
 'b3': array([[-0.01344827]]),
 'd2': array([[-0.00025772],
        [-0.00158462],
        [-0.00098456]]),
 'W2': array([[-0.00016194, -0.00012528, -0.00013738],
        [-0.00099568, -0.00077027, -0.00084471],
        [-0.00061863, -0.00047858, -0.00052484]]),
 'b2': array([[-0.00025772],
        [-0.00158462],
        [-0.00098456]]),
 'd1': array([[-0.0004487 ],
        [-0.00038702],
        [-0.00035156]]),
 'W1': array([[3.29045120e-05, 1.56221576e-04],
        [2.83817888e-05, 1.34748930e-04],
        [2.57813322e-05, 1.22402677e-04]]),
 'b1': array([[-0.0004487 ],
        [-0.00038702],
        [-0.00035156]])}

In [132]:
lr = 0.001
for l in range(L - 1, 0, -1):
    params[f'W{l}'] -= 0.01 * grads[f'W{l}']
    params[f'b{l}'] -= 0.01 * grads[f'b{l}']

In [133]:
params

{'W1': array([[0.8239527 , 0.81606378],
        [0.89700393, 0.2514829 ],
        [0.72821011, 0.9201799 ]]),
 'b1': array([[0.86964557],
        [0.09768164],
        [0.50625087]]),
 'W2': array([[0.57026313, 0.19104894, 0.24854755],
        [0.89724141, 0.55717103, 0.58878955],
        [0.35817263, 0.62685301, 0.42190321]]),
 'b2': array([[0.97938915],
        [0.50419585],
        [0.51338321]]),
 'W3': array([[0.13382087, 0.87339264, 0.42719997]]),
 'b3': array([[0.77902457]])}