# <center> Apply backpropagation algorithm to wheet seeds datasets</center>

In [None]:
import numpy as np
import os

In [None]:
dataset_dir = '/home/lidong/Datasets/ML' 

In [None]:
def load_dataset_from_file(path):
    inputs = []
    targets = []
    with open(path) as f:
        for line in f:
            row = line.rstrip('\n').split()
            inputs.append(list(map(lambda x: float(x.strip()), row[:-1])))
            targets.append(int(row[-1].strip()))
            
    # classify by raw values
    class_values = set(targets)
    class_indexs = dict()
    for i, value in enumerate(class_values):
        class_indexs[value] = i
        
    dataset = []
    for i in range(len(targets)):
        dataset.append((inputs[i], class_indexs[targets[i]]))
    return np.array(dataset), class_indexs

In [None]:
def normalize_dataset(dataset):
    inputs = dataset[:, 0]
    minmax = [[min(column), max(column)] for column in zip(*inputs)]
    columns = len(minmax)
    for row in inputs:
        for i in range(columns):
            row[i] = round((row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0]), 4)
    return dataset

Activitation function

In [None]:
def activite_transfer(z):
    # sigmoid
    return 1.0/(1.0+np.exp(-z))

def activite_derivative(z):
    # the derivative of the sigmoid
    return activite_transfer(z)*(1-activite_transfer(z))

In [None]:
def forward_propagate(network, x):
    # 1d-array transpose
    a = np.reshape(x, (len(x), 1))
    zs = [] # the input layer, no activitation and z(weighted values)
    activations = [a] # first for input layer
    for b, w in zip(network['biases'], network['weights']):
        z = np.dot(w, a) + b
        zs.append(z)
        a = activite_transfer(z)
        activations.append(a)
    return zs, activations

Backward propagate:

--------------------------------

$$ 
\begin{align*} 
\delta^L &= (a^L-y) \odot \sigma'(z^L) \\
\delta^l &= ((w^{l+1})^T \delta^{l+1}) \odot \sigma'(z^l) \\
\end{align*}
$$

--------------------------------

In [None]:
def backward_propagate(network, y, zs, activations):
    # init nabla(delta) values of weights and biases for all layers except input layer
    nabla_b = [np.zeros(b.shape) for b in network['biases']]
    nabla_w = [np.zeros(w.shape) for w in network['weights']]
    
    nlayers = network['nlayers']
    # Using nagetive index ingeniously
    for l in range(1, nlayers):
        if l == 1:
            delta = (activations[-l] - y) * activite_derivative(zs[-l])
        else:
            delta = np.dot(network['weights'][-l+1].transpose(), delta) * activite_derivative(zs[-l])
        nabla_b[-l] = delta
        nabla_w[-l] = np.dot(delta, np.transpose(activations[-l-1]))
    return (nabla_b, nabla_w)


--------------------------------

$$ 
\begin{align*} 
\dfrac{\partial C}{\partial w^l} &= \delta^l (a^{l-1})^T \\
w^l &= w^l-\frac{\eta}{m} \sum_x \delta^{x,l} (a^{x,l-1})^T \\
\dfrac{\partial C}{\partial b^l_j} &= \delta^l_j \\
b^l &= b^l-\frac{\eta}{m} \sum_x \delta^{x,l}
\end{align*}
$$


In [None]:
def update_weights_biases(network, total, eta, nabla_b, nabla_w):
    network["biases"] = [b - (eta/total)*nb for b, nb in zip(network["biases"], nabla_b)]
    network["weights"] = [w - (eta/total)*nw for w, nw in zip(network["weights"], nabla_w)]

In [None]:
class NeuralNetwork(object):
    def __init__(self, dataset, sizes):
        self.dataset = dataset
        self.sizes = sizes
        self.network = self.initialize_network(sizes)
        # print(self.network)
        
    @staticmethod
    def initialize_network(sizes):
        network = dict()
        network['nlayers'] = len(sizes)
        #TODO input layer does not exist weigths and biases
        network["biases"] = [np.random.randn(y, 1) for y in sizes[1:]]
        network["weights"] = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        return network
        
    def train(self, batch_size, eta, epochs):
        # SGD stochastic gradient descent
        N = len(dataset)
        for epoch in range(epochs):
            np.random.shuffle(self.dataset)
            batches = [self.dataset[k:k+batch_size] for k in range(0, N, batch_size)]
            for batch in batches:
                nabla_b = [np.zeros(b.shape) for b in self.network['biases']]
                nabla_w = [np.zeros(w.shape) for w in self.network['weights']]
                for x, y in batch:
                    # 1. feed forward
                    zs, activations = forward_propagate(self.network, x)
                    # 2. back propagation
                    delta_nabla_b, delta_nabla_w = backward_propagate(self.network, y, zs, activations)
                    nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
                    nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
                    
                # 3. update weights and biases
                update_weights_biases(self.network, len(batch), eta, nabla_b, nabla_w)
            
    def evaluate(self, test_data):
        count = 0
        for x, y in test_data:
            _, results = forward_propagate(self.network, x)
            if (np.argmax(results[-1])) == y:
                count += 1
        print(count/len(test_data))        

In [None]:
dataset, class_indexs = load_dataset_from_file(os.path.join(dataset_dir, 'seeds_dataset.txt'))
dataset = normalize_dataset(dataset)

ninput = len(dataset[0, 0])
nhiden = 5
noutput = len(class_indexs)
sizes=(ninput, nhiden, noutput)

nw = NeuralNetwork(dataset, sizes)
nw.train(50, 0.2, 500)
nw.evaluate(dataset[80:140])