In [1]:
from copy import copy
from random import uniform
from math import exp

In [2]:
# `unit_function` are fixed to be sigmoid.
#
# Labeling:
#   Each unit is labeled by Int * Int, where
#   the first is for layer, and the second is
#   for the unit on that layer.
#
# Abbreviation:
#   l -> layer, u -> unit on the layer,
#   m -> medium layer,
#   x -> value on net, w -> weights on net
#   t -> target

In [3]:
sigmoid = lambda x: 1 / (1 + exp(-x))
scale = [2, 2, 1]
ann = {}
ann['scale'] = scale

# generate initialized weights, as weights[m][u][upstream_u]:
ann['w_data'] = []
for m in range(len(scale) - 1):
    weights_on_m = []
    for u in range(scale[m + 1]): # l = m + 1
        weights_to_u = [uniform(-0.05, 0.05) for upstream_u in range(scale[m] + 1)]
        weights_on_m.append(weights_to_u)
    ann['w_data'].append(weights_on_m)

def w(to_unit, from_unit):
    """ [Int] * [Int] -> Real
    
    E.g. for scale = [2, 1], the `w` can take
    arguments: [0, 0] (the first layer and the
    first unit on this layer), [0, 1], and
    [0, 2], the last of which is for the threshold-
    weight.
    """
    if to_unit[0] != from_unit[0] + 1:
        return "Error: there's no such net!"
    else:
        return ann['w_data'][from_unit[0]][to_unit[1]][from_unit[1]]
ann['w'] = w

def reassign_w(to_unit, from_unit, new_value):
    """ [Int] * [Int] * Real -> None
    Not a function -- modifies ann['w_data'] in outer frame.
    """
    ann['w_data'][from_unit[0]][to_unit[1]][from_unit[1]] = new_value
    return None
ann['reassign_w'] = reassign_w

In [4]:
def output_from_unit(inputs, unit):
    """ [Real] * [Int] -> Real
    """
    x = copy(inputs)
    x.insert(0, 1)
    net = sum([x[i] * w(unit, [unit[0] - 1, i]) for i in range(len(x))])
    return sigmoid(net)

def forward_propagate(ann, inputs):
    """ Ann * [Real] -> Ann_outputs
    
    where Ann_outputs = {'outputs': [Real], 'x_data': [[[Real]]], 'x': ([Int] * [Int] -> Real)}
    and x_data has x_data[m][u][upstream_u], like w_data.
    """
    ann_outputs = {}
    # x_data
    x_data = []
    for m in range(len(scale) - 1):
        if m == 0:
            x_on_m = [inputs for u in range(scale[1])]
            x_data.append(x_on_m)
        else:
            x_on_m = [[output_from_unit(x_data[m - 1][0], [m, u]) for u in range(scale[m])] for upper_u in range(scale[m + 1])]
            x_data.append(x_on_m)
    # x:
    def x(to_unit, from_unit):
        """ [Int] * [Int] -> Real
        """
        if to_unit[0] != from_unit[0] + 1:
            return "Error: there's no such net!"
        else:
            return x_data[from_unit[0]][to_unit[1]][from_unit[1]]
    # outputs:
    outputs = [output_from_unit(x_data[-1][0], [m, u]) for u in range(scale[-1])]
    # o:
    def o(unit):
        """ [Int] -> Real
        """
        if unit[0] == len(scale) - 1: # output unit
            return outputs[unit[1]]
        else:
            return x([unit[0] + 1, 0], unit)
    # ann_outputs:
    ann_outputs['x_data'] = x_data
    ann_outputs['x'] = x
    ann_outputs['outputs'] = outputs
    ann_outputs['o'] = o
    return ann_outputs

In [15]:
learning_speed = 0.05
momentum = 0

def back_propgate(ann, target, pre_delta_w = lambda to_unit, from_unit: 0):
    """ Ann * [Real] -> ([Int] * [Int] -> Real)
    Not a function -- modifies the `ann` in outer frame.
    """
    t = target
    w = ann['w']
    reassign_w = ann['reassign_w']
    ann_outputs = forward_propagate(ann, inputs)
    x = ann_outputs['x']
    o = ann_outputs['o']
    # delta_data:
    delta_data = []
    l = len(scale) - 1
    delta_output_l = [o([l, k]) * (1 - o([l, k])) * (t[k] - o([l, k])) for k in range(scale[-1])]
    delta_data.append(delta_output_l)
    for l0 in range(2, len(scale)):
        l = len(scale) - l0
        delta_hidden_l = [o([l, h]) * (1 - o([l, h])) * sum([w([l + 1, k], [l, h]) * delta_data[0][k] for k in range(scale[l + 1])]) for h in range(scale[l])]
        delta_data.insert(0, delta_hidden_l)
    # delta:
    def delta(unit):
        if unit[0] == 0:
            return "Error: the input-layer has no delta!"
        else:
            return delta_data[unit[0] - 1][unit[1]]
    # delta_w:
    def delta_w(to_unit, from_unit):
        """ [Int] * [Int] -> Real
        """
        return learning_speed * delta(to_unit) * x(to_unit, from_unit) + momentum * pre_delta_w(to_unit, from_unit)
    # update ann['w_data']:
    for m in range(len(scale) - 1):
        for u in range(scale[m + 1]):
            for upstream_u in range(scale[m]):
                to_unit = [m + 1, u]
                from_unit = [m, upstream_u]
                reassign_w(to_unit, from_unit, w(to_unit, from_unit) + delta_w(to_unit, from_unit))
    return delta_w

In [20]:
inputs = [1, -2]

pre_delta_w = lambda to_unit, from_unit: 0
for i in range(10):
    delta_w = back_propgate(ann, [100], pre_delta_w)
    pre_delta_w = delta_w
    print(forward_propagate(ann, inputs)['outputs'])

[1.0]
[1.0]
[1.0]
[1.0]
[1.0]
[1.0]
[1.0]
[1.0]
[1.0]
[1.0]
