# Imports

In [110]:
import numpy as np

# Functions

In [111]:
def np_array(x):
    return np.array(x, dtype='float')

def sig(z):
    s = lambda x: 1 / (1 + np.exp(-x))
    return s(z)

def softmax(z):
    exp_z = np.exp(z)
    T = sum(exp_z)
    return exp_z/T

def L_CE(y, t):
    return -sum(np.multiply(t, np.log(y)))

def softmax_prime(z):
    exp_z = np.exp(z)
    T = sum(exp_z)
    return exp_z*(T-exp_z)/T**2

def forwardprop(x, weights, biases):
    a = [x]

    for l in range(1, L):
       z_l = np.matmul(a[l-1].transpose(), weights[l]) + biases[l]

       if l != L-1:
           a_l = sig(z_l)
       else:
           a_l = softmax(z_l)
       a.append(a_l)

    for a_l in a:
        print(a_l)

    return a

def calc_deltas(a, weights, t):
    d_l = a[-1] - t
    deltas = [d_l]

    for l in range(L-2, 0, -1):
        d_l = np.multiply(np.matmul(weights[l+1], d_l), softmax_prime(a[l-1]))

        deltas.append(d_l)

    deltas.append([])
    deltas.reverse()

    for d_l in deltas:
        print(d_l)

    return deltas

# NN Data

In [112]:
L = 3

W1 = np_array(
    [
        [-2, -1],
        [3, 0]
    ]
)
W2 = np_array(
    [
        [2, 3],
        [-1, -2]
    ]
)
weights = [
    [], W1, W2
]

b1 = np_array([0.5, 1.5])
b2 = np_array([2, -1])
biases = [
    [], b1, b2
]

x = np_array([-1, 1])
t = np_array([1, 0])

# Prev ex

W1 = np_array(
    [
        [1, -1],
        [-3, 2]
    ]
)
W2 = np_array(
    [
        [-2, 0],
        [0, 3]
    ]
)
weights = [
    [], W1, W2
]

b1 = np_array([0, 1])
b2 = np_array([1, -2])
biases = [
    [], b1, b2
]

x = np_array([1, -2])
t = np_array([1, 0])

# Sigmoid as hidden layer activation, softmax as output layer activation

In [113]:
a = forwardprop(x, weights, biases)
L_CE(a[-1], t)

[-1.  1.]
[0.99592986 0.92414182]
[0.94922356 0.05077644]


0.052110929763763976

# Backpropagation

In [114]:
deltas = calc_deltas(a, weights, t)

[]
[ 0.0053312 -0.0053312]
[-0.05077644  0.05077644]


In [115]:
ETA = 0.1

new_weights = weights.copy()
new_biases = biases.copy()

for l in range(1, L):
    new_weights[l] -= ETA*np.matmul(a[l], deltas[l].transpose())
    new_biases[l] -= ETA*deltas[l]


In [117]:
a1 = forwardprop(x, new_weights, new_biases)
L_CE(a1[-1], t)

[-1.  1.]
[0.9959277  0.92417919]
[0.94971269 0.05028731]


0.05159577052645336