In [1]:
# Import necessary libraries
import numpy as np

In [2]:
# Sigmoid activation layer
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [3]:
# Derivating of sigmoid activation function
def calculate_derivative(activation_fn,x):
    return sigmoid(x)*(1-sigmoid(x))

In [4]:
# Calculating forward pass in the network with given data point
def forward_pass(network,row,activation_fn):
    for i,layer_parameters in enumerate(network):
        if i == 0:
            network[i]["inputs"] = np.dot(row,layer_parameters["weights"]).flatten()
        else:
            network[i]["inputs"] = np.dot(network[i-1]["outputs"],layer_parameters["weights"]).flatten()
        network[i]["outputs"] = activation_fn(network[i]["inputs"]).flatten()
    return network,network[-1]["outputs"]

In [5]:
# Calculating backward pass in the network 
def backward_pass(network,expected):
    for i in reversed(range(len(network))):
        if i == len(network)-1:
            # Output layer and hence expected - network outputs
            network[i]["delta"] = (expected-network[i]["outputs"]).reshape(-1,1)* \
                calculate_derivative(sigmoid,np.sum(network[i]["weights"].T*network[i]["outputs"].reshape(-1,1),axis=1)).reshape(-1,1)
        else:
            # delta for a given hidden layer
            network[i]["delta"] = np.dot(network[i+1]["weights"],network[i+1]["delta"].reshape(-1,1)).T* \
                calculate_derivative(sigmoid,np.sum(network[i]["weights"].T*network[i]["outputs"].reshape(-1,1),axis=1))
                
    return network

In [6]:
# Updating weigts for each neuron
def update_weights(network,row,l_rate):
    for i in range(len(network)):
        if i != 0:
            network[i]["weights"] += np.array(l_rate*network[i-1]["outputs"]*network[i]["delta"].reshape(-1,1) \
                                              ).reshape(network[i]["weights"].shape)
        else:
            network[i]["weights"] += np.array(l_rate*network[i]["delta"].reshape(-1,1)*np.array(row).reshape(1,-1)).reshape(network[i]["weights"].shape)
    return network

In [7]:
import copy
parameters = {"weights":[],"inputs":[],"outputs":[],"delta":[]}
network = []
network += [copy.deepcopy(parameters)]
network += [copy.deepcopy(parameters)]
network[0]["weights"] = np.array([[0.4],[-0.2]])
network[1]["weights"] = np.array([[0.1]])
network
print(forward_pass(network,np.array([[0.1,0.3]]),sigmoid))
print(backward_pass(network,1.0))
print(update_weights(network,np.array([[0.1,0.3]]),0.5))

([{'outputs': array([ 0.49500017]), 'weights': array([[ 0.4],
       [-0.2]]), 'inputs': array([-0.02]), 'delta': []}, {'outputs': array([ 0.51237248]), 'weights': array([[ 0.1]]), 'inputs': array([ 0.04950002]), 'delta': []}], array([ 0.51237248]))
[{'outputs': array([ 0.49500017]), 'weights': array([[ 0.4],
       [-0.2]]), 'inputs': array([-0.02]), 'delta': array([[ 0.00303822]])}, {'outputs': array([ 0.51237248]), 'weights': array([[ 0.1]]), 'inputs': array([ 0.04950002]), 'delta': array([[ 0.12182691]])}]
[{'outputs': array([ 0.49500017]), 'weights': array([[ 0.40015191],
       [-0.19954427]]), 'inputs': array([-0.02]), 'delta': array([[ 0.00303822]])}, {'outputs': array([ 0.51237248]), 'weights': array([[ 0.13015217]]), 'inputs': array([ 0.04950002]), 'delta': array([[ 0.12182691]])}]


In [8]:
# Initialize a network
def initialize_network(n_inputs, n_hidden,n_hidden_units, n_outputs):
    network = list()
    parameters = {"weights":[],"inputs":[],"outputs":[],"delta":[]}
    for i in range(n_hidden):
        network.append(copy.deepcopy(parameters))
        if i == 0:
            network[i]["weights"] = np.random.normal(scale=0.001,size=(n_inputs, n_hidden_units))
        else:
            network[i]["weights"] = np.random.normal(scale=0.001 ** .5,size=(n_hidden_units, n_hidden_units))
    output_layer = copy.deepcopy(parameters)
    output_layer["weights"] = np.random.normal(scale=0.01,size=(n_hidden_units, n_outputs))
    network.append(output_layer)
    return network

In [9]:
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
    for epoch in range(n_epoch):
        sum_error = 0
        for row in train:
            outputs = forward_pass(network, row[:-1],sigmoid)[1]
            expected = [0 for i in range(n_outputs)]
            expected[row[-1]] = 1
            sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
            backward_pass(network, expected)
            update_weights(network, row[:-1], l_rate)
        print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

In [16]:
# Test training backprop algorithm
dataset = [[2.7810836,2.550537003,0],
    [1.465489372,2.362125076,0],
    [3.396561688,4.400293529,0],
    [1.38807019,1.850220317,0],
    [3.06407232,3.005305973,0],
    [7.627531214,2.759262235,1],
    [5.332441248,2.088626775,1],
    [6.922596716,1.77106367,1],
    [8.675418651,-0.242068655,1],
    [7.673756466,3.508563011,1]]
n_inputs = len(dataset[0]) - 1
n_outputs = len(set([row[-1] for row in dataset]))
network = initialize_network(n_inputs, 1,2, n_outputs)
train_network(network, dataset, 1.0, 15, n_outputs)

>epoch=0, lrate=1.000, error=5.000
>epoch=1, lrate=1.000, error=5.000
>epoch=2, lrate=1.000, error=5.000
>epoch=3, lrate=1.000, error=4.998
>epoch=4, lrate=1.000, error=4.990
>epoch=5, lrate=1.000, error=4.950
>epoch=6, lrate=1.000, error=4.808
>epoch=7, lrate=1.000, error=4.451
>epoch=8, lrate=1.000, error=3.869
>epoch=9, lrate=1.000, error=3.229
>epoch=10, lrate=1.000, error=2.622
>epoch=11, lrate=1.000, error=2.090
>epoch=12, lrate=1.000, error=1.688
>epoch=13, lrate=1.000, error=1.435
>epoch=14, lrate=1.000, error=1.334
