# <center> Apply backpropagation algorithm to wheet seeds datasets</center>

In [1]:
import numpy as np
import os

In [2]:
dataset_dir = '/home/lidong/Datasets/ML' 

In [61]:
def load_dataset_from_file(path):
    inputs = []
    targets = []
    with open(path) as f:
        for line in f:
            row = line.rstrip('\n').split()
            inputs.append(list(map(lambda x: float(x.strip()), row[:-1])))
            targets.append(int(row[-1].strip()))
            
    # classify by raw values
    class_values = set(targets)
    class_indexs = dict()
    for i, value in enumerate(class_values):
        class_indexs[value] = i
        
    # one-hot
    dataset = []
    num_onehot = len(class_values)
    for i in range(len(targets)):
        values = [0 for i in range(num_onehot)]
        values[class_indexs[targets[i]]] = 1
        dataset.append((inputs[i], values))
    return np.array(dataset)

In [4]:
def normalize_dataset(dataset):
    inputs = dataset[:, 0]
    minmax = [[min(column), max(column)] for column in zip(*inputs)]
    columns = len(minmax)
    for row in inputs:
        for i in range(columns):
            row[i] = round((row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0]), 4)
    return dataset

Activitation function

In [5]:
def activite_transfer(z):
    # sigmoid
    return 1.0/(1.0+np.exp(-z))

def activite_derivative(z):
    # the derivative of the sigmoid
    return activite_transfer(z)*(1-activite_transfer(z))

In [6]:
def forward_propagate(network, x):
    # 1d-array transpose
    a = np.reshape(x, (len(x), 1))
    zs = [] # the input layer, no activitation and z(weighted values)
    activations = [a] # first for input layer
    for b, w in zip(network['biases'], network['weights']):
        z = np.dot(w, a) + b
        zs.append(z)
        a = activite_transfer(z)
        activations.append(a)
    return zs, activations

Backward propagate:

--------------------------------

$$ 
\begin{align*} 
\delta^L &= (a^L-y) \odot \sigma'(z^L) \\
\delta^l &= ((w^{l+1})^T \delta^{l+1}) \odot \sigma'(z^l) \\
\end{align*}
$$

--------------------------------

In [66]:
def backward_propagate(network, y, zs, activations):
    # init nabla(delta) values of weights and biases for all layers except input layer
    nabla_b = [np.zeros(b.shape) for b in network['biases']]
    nabla_w = [np.zeros(w.shape) for w in network['weights']]
    
    target = np.reshape(y, (len(y), 1))
    nlayers = network['nlayers']
    # Using nagetive index ingeniously
    for l in range(1, nlayers):
        if l == 1:
            delta = (activations[-l] - target) * activite_derivative(zs[-l])
        else:
            delta = np.dot(network['weights'][-l+1].transpose(), delta) * activite_derivative(zs[-l])
        nabla_b[-l] = delta
        nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
    return (nabla_b, nabla_w)


--------------------------------

$$ 
\begin{align*} 
\dfrac{\partial C}{\partial w^l} &= \delta^l (a^{l-1})^T \\
w^l &= w^l-\frac{\eta}{m} \sum_x \delta^{x,l} (a^{x,l-1})^T \\
\dfrac{\partial C}{\partial b^l_j} &= \delta^l_j \\
b^l &= b^l-\frac{\eta}{m} \sum_x \delta^{x,l}
\end{align*}
$$


In [8]:
def update_weights_biases(network, total, eta, nabla_b, nabla_w):
    network["biases"] = [b - (eta/total)*nb for b, nb in zip(network["biases"], nabla_b)]
    network["weights"] = [w - (eta/total)*nw for w, nw in zip(network["weights"], nabla_w)]

In [83]:
class NeuralNetwork(object):
    def __init__(self, dataset, sizes):
        self.dataset = dataset
        self.sizes = sizes
        self.network = self.initialize_network(sizes)
        print(sizes)
        # print(self.network)
        
    @staticmethod
    def initialize_network(sizes):
        network = dict()
        network['nlayers'] = len(sizes)
        #TODO input layer does not exist weigths and biases
        network["biases"] = [np.random.randn(y, 1) for y in sizes[1:]]
        network["weights"] = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        return network
        
    def train(self, batch_size, eta, epochs):
        # SGD stochastic gradient descent
        N = len(dataset)
        for epoch in range(epochs):
            np.random.shuffle(self.dataset)
            batches = [self.dataset[k:k+batch_size] for k in range(0, N, batch_size)]
            for batch in batches:
                nabla_b = [np.zeros(b.shape) for b in self.network['biases']]
                nabla_w = [np.zeros(w.shape) for w in self.network['weights']]
                for x, y in batch:
                    # 1. feed forward
                    zs, activations = forward_propagate(self.network, x)
                    # 2. back propagation
                    delta_nabla_b, delta_nabla_w = backward_propagate(self.network, y, zs, activations)
                    nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
                    nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
                    
                # 3. update weights and biases
                update_weights_biases(self.network, len(batch), eta, nabla_b, nabla_w)
        # test        
        np.random.shuffle(self.dataset)
        print(self.evaluate(self.dataset[:70]))
            
    def evaluate(self, test_data):
        count = 0
        for x, y in test_data:
            _, results = forward_propagate(self.network, x)
            if y[np.argmax(results[-1])]:
                count += 1
        return (float(count)/len(test_data))        

In [86]:
dataset = load_dataset_from_file(os.path.join(dataset_dir, 'seeds_dataset.txt'))
dataset = normalize_dataset(dataset)
ninput = len(dataset[0, 0])
nhiden = 5
noutput = len(dataset[0, 1])
sizes=(ninput, nhiden, noutput)

In [85]:
nw = NeuralNetwork(dataset, sizes)
nw.train(60, 0.05, 300)

(7, 5, 3)
[[0.32782448]
 [0.43317646]
 [0.21910345]] [1, 0, 0]
[[0.33418649]
 [0.74929747]
 [0.08054873]] [0, 1, 0]
[[0.3888445 ]
 [0.21366478]
 [0.36396078]] [1, 0, 0]
[[0.35568194]
 [0.61282675]
 [0.11499516]] [1, 0, 0]
[[0.29956233]
 [0.09241782]
 [0.72297662]] [0, 0, 1]
[[0.36722755]
 [0.34183543]
 [0.22801633]] [1, 0, 0]
[[0.34407604]
 [0.11986598]
 [0.6327772 ]] [0, 0, 1]
[[0.38884868]
 [0.24021345]
 [0.30645366]] [1, 0, 0]
[[0.3565263 ]
 [0.69778656]
 [0.08154024]] [0, 1, 0]
[[0.33347097]
 [0.13181385]
 [0.59419118]] [0, 0, 1]
[[0.38291455]
 [0.25057543]
 [0.28484204]] [1, 0, 0]
[[0.33820901]
 [0.75637702]
 [0.07973108]] [0, 1, 0]
[[0.38349007]
 [0.20538844]
 [0.38146809]] [1, 0, 0]
[[0.3267797 ]
 [0.78988556]
 [0.07222063]] [0, 1, 0]
[[0.33438659]
 [0.78113134]
 [0.06811712]] [0, 1, 0]
[[0.32460534]
 [0.36475406]
 [0.26662106]] [1, 0, 0]
[[0.33417276]
 [0.63114117]
 [0.12304979]] [0, 1, 0]
[[0.35012403]
 [0.70879521]
 [0.07955429]] [0, 1, 0]
[[0.33651617]
 [0.60373198]
 [0.1270