In [4]:
from copy import copy
from random import uniform
from math import exp

## Convention

In [5]:
# `unit_function` are fixed to be sigmoid.
#
# Labeling:
#   Each unit is labeled by Int * Int, where
#   the first is for layer, and the second is
#   for the unit on that layer.
#
# Abbreviation:
#   l -> layer, u -> unit on the layer,
#   m -> medium layer,
#   x -> value on net, w -> weights on net
#   t -> target

## Data Abstraction

In [59]:
sigmoid = lambda x: 1 / (1 + exp(-x))
scale = [2, 2, 1]
ann = {}
ann['scale'] = scale

# generate initialized weights, as weights[m][u][upstream_u]:
ann['w_data'] = []
for m in range(len(scale) - 1):
    weights_on_m = []
    for u in range(scale[m + 1]): # l = m + 1
        weights_to_u = [uniform(-0.05, 0.05) for upstream_u in range(scale[m] + 1)]
        weights_on_m.append(weights_to_u)
    ann['w_data'].append(weights_on_m)

In [60]:
def w(to_unit, from_unit):
    """ [Int] * [Int] -> Real
    
    E.g. for scale = [2, 1], the `w` can take
    arguments: [0, 0] (the first layer and the
    first unit on this layer), [0, 1], and
    [0, 2], the last of which is for the threshold-
    weight.
    """
    if to_unit[0] != from_unit[0] + 1:
        return "Error: there's no such net!"
    else:
        return ann['w_data'][from_unit[0]][to_unit[1]][from_unit[1]]

ann['w'] = w
w([1,0], [0,2])

0.002381188453962013

In [69]:
def reassign_w(to_unit, from_unit, new_value):
    """ [Int] * [Int] * Real -> None
    Not a function -- modifies ann['w_data'] in outer frame.
    """
    ann['w_data'][from_unit[0]][to_unit[1]][from_unit[1]] = new_value
    return None
ann['reassign_w'] = reassign_w

reassign_w([1,0], [0,0], 2.1)
w([1,0], [0,0])

2.1

### Forward Propagate

In [61]:
def output_from_unit(inputs, unit):
    """ [Real] * [Int] -> Real
    """
    x = copy(inputs)
    x.insert(0, 1)
    net = sum([x[i] * w(unit, [unit[0] - 1, i]) for i in range(len(x))])
    return sigmoid(net)

In [52]:
inputs = [1, 2]
ann_outputs = {}

x_data = []
for m in range(len(scale) - 1):
    if m == 0:
        x_on_m = [inputs for u in range(scale[1])]
        x_data.append(x_on_m)
    else:
        x_on_m = [[output_from_unit(x_data[m - 1][0], [m, u]) for u in range(scale[m])] for upper_u in range(scale[m + 1])]
        x_data.append(x_on_m)

ann_outputs['x_data'] = x_data

In [53]:
outputs = [output_from_unit(x_data[-1][0], [m, u]) for u in range(scale[-1])]
ann_outputs['outputs'] = outputs

In [55]:
def x(to_unit, from_unit):
    """ [Int] * [Int] -> Real
    """
    if to_unit[0] != from_unit[0] + 1:
        return "Error: there's no such net!"
    else:
        return x_data[from_unit[0]][to_unit[1]][from_unit[1]]
ann_outputs['x'] = x

In [71]:
def o(unit):
    """ [Int] -> Real
    """
    if unit[0] == len(scale) - 1: # output unit
        return outputs[unit[1]]
    else:
        return x([unit[0] + 1, 0], unit)

In [74]:
def forward_propagate(ann, inputs):
    """ Ann * [Real] -> Ann_outputs
    
    where Ann_outputs = {'outputs': [Real], 'x_data': [[[Real]]], 'x': ([Int] * [Int] -> Real)}
    and x_data has x_data[m][u][upstream_u], like w_data.
    """
    ann_outputs = {}
    # x_data
    x_data = []
    for m in range(len(scale) - 1):
        if m == 0:
            x_on_m = [inputs for u in range(scale[1])]
            x_data.append(x_on_m)
        else:
            x_on_m = [[output_from_unit(x_data[m - 1][0], [m, u]) for u in range(scale[m])] for upper_u in range(scale[m + 1])]
            x_data.append(x_on_m)
    # x:
    def x(to_unit, from_unit):
        """ [Int] * [Int] -> Real
        """
        if to_unit[0] != from_unit[0] + 1:
            return "Error: there's no such net!"
        else:
            return x_data[from_unit[0]][to_unit[1]][from_unit[1]]
    # outputs:
    outputs = [output_from_unit(x_data[-1][0], [m, u]) for u in range(scale[-1])]
    # o:
    def o(unit):
        """ [Int] -> Real
        """
        if unit[0] == len(scale) - 1: # output unit
            return outputs[unit[1]]
        else:
            return x([unit[0] + 1, 0], unit)
    # ann_outputs:
    ann_outputs['x_data'] = x_data
    ann_outputs['x'] = x
    ann_outputs['outputs'] = outputs
    ann_outputs['o'] = o
    return ann_outputs

ao = forward_propagate(ann, inputs)
ao['x']([2,0], [1, 1])
ao['outputs']

[0.8876539065880921]

### Backward Propagate

In [79]:
delta_data = []
t = [1]
ann_outputs = forward_propagate(ann, inputs)
o = ann_outputs['o']
l = len(scale) - 1
delta_output_l = [o([l, k]) * (1 - o([l, k])) * (t[k] - o([l, k])) for k in range(scale[-1])]
delta_data.append(delta_output_l)

In [86]:
l = len(scale) - 2
delta_hidden_l = [o([l, h]) * (1 - o([l, h])) * sum([w([l + 1, k], [l, h]) * delta_data[0][k] for k in range(scale[l + 1])]) for h in range(scale[l])]
delta_data.insert(0, delta_hidden_l)

In [92]:
def delta(unit):
    if unit[0] == 0:
        return "Error: the input-layer has no delta!"
    else:
        return delta_data[unit[0] - 1][unit[1]]

delta([1,1])

-8.053247045219972e-05

In [95]:
learning_speed = 0.01

def delta_w(to_unit, from_unit):
    return learning_speed * delta(to_unit) * x(to_unit, from_unit)

delta_w([1,0], [0,0])

-3.0300519459073115e-07

In [98]:
reassign_w = ann['reassign_w']
for m in range(len(scale) - 1):
    for u in range(scale[m + 1]):
        for upstream_u in range(scale[m]):
            to_unit = [m + 1, u]
            from_unit = [m, upstream_u]
            reassign_w(to_unit, from_unit, w(to_unit, from_unit) + delta_w(to_unit, from_unit))

In [99]:
ann['w_data']

[[[2.0999996969948054, -0.03845562607359196, 0.002381188453962013],
  [-0.0005905911108305429, -0.04124807694410011, -0.03019140342420761]],
 [[-0.02704714591417531, -0.028769187759821922, -0.03119673530258952]]]

In [None]:
def back_propgate(ann, target):
    """ Ann * [Real] -> None
    Not a function -- modifies the `ann` in outer frame.
    """
    # delta_data:
    delta_data = []
    ann_outputs = forward_propagate(ann, inputs)
    o = ann_outputs['o']
    l = len(scale) - 1
    delta_output_l = [o([l, k]) * (1 - o([l, k])) * (t[k] - o([l, k])) for k in range(scale[-1])]
    delta_data.append(delta_output_l)
    for l0 in range(2, len(scale)):
        l = len(scale) - l0
        delta_hidden_l = [o([l, h]) * (1 - o([l, h])) * sum([w([l + 1, k], [l, h]) * delta_data[0][k] for k in range(scale[l + 1])]) for h in range(scale[l])]
        delta_data.insert(0, delta_hidden_l)
    # delta:
    def delta(unit):
        if unit[0] == 0:
            return "Error: the input-layer has no delta!"
        else:
            return delta_data[unit[0] - 1][unit[1]]
    # delta_w:
    def delta_w(to_unit, from_unit):
        """ [Int] * [Int] -> Real
        """
        return learning_speed * delta(to_unit) * x(to_unit, from_unit)
    # update ann['w_data']:
    reassign_w = ann['reassign_w']
    for m in range(len(scale) - 1):
        for u in range(scale[m + 1]):
            for upstream_u in range(scale[m]):
                to_unit = [m + 1, u]
                from_unit = [m, upstream_u]
                reassign_w(to_unit, from_unit, w(to_unit, from_unit) + delta_w(to_unit, from_unit))
    return None

In [100]:
for l0 in range(2, len(scale)):
    print(l0)

2
