In [2]:
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(linewidth=np.inf)

In [3]:
relu = lambda x: np.where(x < 0, 0, x)
d_relu = lambda x: np.where(x < 0, 0, 1)
loss = lambda Y, y: np.mean((Y - y)**2)
d_loss = lambda Y, y: Y - y

# pretty print int or float
pf = lambda x: f'{x:g}'
# pretty print 2d numpy array (numpy printing suxx)
pa = lambda arr: pf(arr) if not hasattr(arr, '__iter__') else '['+' '.join(pa(xs) for xs in arr)+']'

In [5]:
debug = True
iters = 1
η = 0.1

def show(layers):
    for i, l in enumerate(layers):
        print(f'-- layer {i} ' + '-'*20)
        print('w =', pa(l['w']))
        print('b =', pa(l['b']))
    
def dense(w, b):
    return dict(w=np.array(w), b=np.array(b))

def forward(layers, X):
    Y = X
    for i, l in enumerate(layers):
        l['x'] = Y
        z = l['z'] = l['x'] @ l['w'] + l['b']
        Y = l['Y'] = relu(z)
        if debug:
            print(f'Z{i+1} = Y{i} W{i+1} + B{i+1} = {pa(z)}')
            print(f'Y{i+1} = σ(Z{i+1})      = {pa(Y)}')
    return Y

def backward(layers, Y, y, η):
    W, B = [None]*len(layers), [None]*len(layers)
    l = layers[-1]
    δ = d_loss(Y, y) * d_relu(l['z'])
    dw = l['x'] * δ
    db = δ
    W[-1] = l['w'] - η*np.mean(dw, axis=0)
    B[-1] = l['b'] - η*np.mean(db, axis=0)
    if debug:
        print(f"δ2  = (Y2-Y^)*σ'(z2)   = {pa(d_loss(Y, y))} * {pa(d_relu(l['z']))} = {pa(δ)}")
        print(f"W2' = W2 - η (Y1*δ2)_M = {pa(W[-1])}")
        print(f"B2' = B2 - η δ2        = {pa(B[-1])}")
    
    for i in range(len(layers) - 1)[::-1]:
        l, l_next = layers[i], layers[i+1]
        δ = (δ @ l_next['w'].T) * d_relu(l['z'])
        print(l['x'].shape, δ.shape)
        dw = l['x'] * δ
        db = δ
        W[i] = layers[i]['w'] - η*np.mean(dw, axis=0)
        B[i] = layers[i]['b'] - η*np.mean(db, axis=0)
        if debug:
            print(f"δ1  = (δ2 w1^T)*σ'(z1) = {pa(δ)}")
            print(f"W1' = W1 - η (y0 δ1)_M = {pa(W[i])}")
            print(f"B1' = B1 - η δ1        = {pa(B[i])}")
        
    for i, l in enumerate(layers):
        l['w'], l['b'] = W[i], B[i]

n = 1
layers, X, y = None, None, None

if n == 1:
    layers = [dense(w=[[0.2]], b=[0.5]), dense(w=[[0.5]], b=[-0.5])]
    X = np.array([[0], [4]])
    y = np.array([[0], [2]])

if n == 2:
    layers = [dense(w=[[0.2]], b=[0.5]), dense(w=[[0.5, 0.0]], b=[-0.5, 0.5])]
    X = np.array([[0], [4]])
    y = np.array([[0, 1], [2, 5]])
    

print('BEFORE TRAINING')
show(layers)

print('\n\nFORWARD')
Y = forward(layers, X)
print('Y =', pa(Y))

print('\n\nBACKWARD')
backward(layers, Y, y, η)

print('\n\nAFTER TRAINING')
show(layers)

BEFORE TRAINING
-- layer 0 --------------------
w = [[0.2]]
b = [0.5]
-- layer 1 --------------------
w = [[0.5]]
b = [-0.5]


FORWARD
Z1 = Y0 W1 + B1 = [[0.5] [1.3]]
Y1 = σ(Z1)      = [[0.5] [1.3]]
Z2 = Y1 W2 + B2 = [[-0.25] [0.15]]
Y2 = σ(Z2)      = [[0] [0.15]]
Y = [[0] [0.15]]


BACKWARD
δ2  = (Y2-Y^)*σ'(z2)   = [[0] [-1.85]] * [[0] [1]] = [[0] [-1.85]]
W2' = W2 - η (Y1*δ2)_M = [[0.62025]]
B2' = B2 - η δ2        = [-0.4075]
(2, 1) (2, 1)
δ1  = (δ2 w1^T)*σ'(z1) = [[0] [-0.925]]
W1' = W1 - η (y0 δ1)_M = [[0.385]]
B1' = B1 - η δ1        = [0.54625]


AFTER TRAINING
-- layer 0 --------------------
w = [[0.385]]
b = [0.54625]
-- layer 1 --------------------
w = [[0.62025]]
b = [-0.4075]
