# <center> Apply backpropagation algorithm to wheet seeds datasets</center>

In [1]:
import random
import math
import os

In [2]:
dataset_dir = '/home/lidong/Datasets/ML'

In [3]:
def load_dataset_from_file(path):
    dataset = []
    with open(path) as f:
        for line in f:
            row = line.rstrip('\n').split()
            dataset.append(list(map(lambda x: float(x.strip()), row[:-1])))
            dataset[-1].append(int(row[-1].strip()))
    # classify by raw values
    class_values = set([row[-1] for row in dataset])
    class_indexs = dict()
    for i, value in enumerate(class_values):
        class_indexs[value] = i
    for row in dataset:
        row[-1] = class_indexs[row[-1]]
    return dataset, class_indexs

In [4]:
# test
dataset, _ = load_dataset_from_file(os.path.join(dataset_dir, 'seeds_dataset.txt'))
dataset[-3:]

[[13.2, 13.66, 0.8883, 5.236, 3.232, 8.315, 5.056, 2],
 [11.84, 13.21, 0.8521, 5.175, 2.836, 3.598, 5.044, 2],
 [12.3, 13.34, 0.8684, 5.243, 2.974, 5.637, 5.063, 2]]

In [5]:
def normalize_dataset(dataset):
    minmax = [[min(column), max(column)] for column in zip(*dataset)]
    for row in dataset:
        for i in range(len(row) - 1):
            row[i] = round((row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0]), 4)
    return dataset

In [6]:
# test
# dataset = normalize_dataset(dataset)
# print(dataset[:3])

In [7]:
# targets = set([row[-1] for row in dataset])
# targets

In [8]:
class NeuralNetwork(object):
    def __init__(self, ninput, nhiden, noutput):
        self.ni = ninput
        self.nh = nhiden
        self.no = noutput
        self.network = self.initialize_network(self.ni, self.nh, self.no)
        
    def train(self, dataset, lr, epochs):
        for epoch in range(epochs):
            for row in dataset:
                expected = [0 for i in range(self.no)]
                expected[row[-1]] = 1
                inputs = row[:-1]
                self.forward_propagate(self.network, inputs, self.activite_transfer)
                self.backward_propagate(self.network, expected, self.activite_derivative)
                self.update_weights(self.network, inputs, lr)
        # print(self.network)
        
    def predict(self, inputs):
        results = list() 
        for row in inputs:
            outputs = self.forward_propagate(self.network, row, self.activite_transfer)
            results.append(outputs.index(max(outputs)))
        # print(results)
        return results
    
    @staticmethod
    def initialize_network(ni, nh, no):
        # print(ni, nh, no)
        network = list()
        # hiden layer
        network.append([{'weights': [random.random() for i in range(ni + 1)]} for j in range(nh)])
        # output layer
        network.append([{'weights': [random.random() for i in range(nh + 1)]} for k in range(no)])
        # print(network)
        return network
    
    @staticmethod
    def activite_transfer(weights, inputs):
        output = weights[-1] # bias
        for i in range(len(weights) - 1):
            output += weights[i] * inputs[i]
        return 1.0 / (1.0 + math.exp(-output)) # activete function sigmiod
    
    @staticmethod
    def activite_derivative(output):
        return output * (1 - output)
    
    @staticmethod
    def forward_propagate(network, inputs, activite_transfer_cb):
        inputs_ = inputs
        for layer in network:
            output = []
            for neuron in layer:
                neuron['output'] = activite_transfer_cb(neuron['weights'], inputs_)
                output.append(neuron['output'])
            inputs_ = output
        # print(inputs_)
        return inputs_
    
    @staticmethod
    def backward_propagate(network, expected, activite_derivative_cb):
        # last layer: output layer
        for k, neuron in enumerate(network[-1]):
            # middle signnal: delta
            error = -1 * (neuron['output'] - expected[k])
            neuron['delta'] = error * activite_derivative_cb(neuron['output'])
            
        # other layers, full connect layers  
        for l in reversed(range(len(network)-1)):
            for j, neuron in enumerate(network[l]):
                error = 0.0
                for nr in network[l+1]:
                    error += nr['delta'] * nr['weights'][j]
                neuron['delta'] = error * activite_derivative_cb(neuron['output'])    
                
    @staticmethod
    def update_weights(network, inputs, l_rate):
        for i in range(len(network)):
            if i == 0:
                # remove the last column: target classification value
                inputs_ = inputs
            else:
                # pre layer output as inputs
                inputs_ = [neuron['output'] for neuron in network[i - 1]]
                
            for neuron in network[i]:
                for i in range(len(inputs_)):
                    # weights
                    neuron['weights'][i] += l_rate * neuron['delta'] * inputs_[i]
                # biases
                neuron['weights'][-1] += l_rate * neuron['delta']

In [9]:
def accuracy_metric(actual, predicted):          
    correct = 0                                  
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0
    
# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, nfolds, ninput, nhiden, noutput, lr, epochs):
    random.seed(0)
    fold_list = list()
    fold_size = int(len(dataset) / nfolds)
    indices = [i for i in range(len(dataset))]
    # random.shuffle(indices)
    for i in range(nfolds):
        fold_list.append([dataset[indices.pop()] for j in range(fold_size)])
        
    scores = list()
    # each fold can use as test data
    for test_data in fold_list:
        train_data = list(fold_list)
        train_data.remove(test_data)
        train_data = sum(train_data, [])
        nn = NeuralNetwork(ninput, nhiden, noutput)
        nn.train(train_data, lr, epochs)
        predicted = nn.predict(test_data)
        actual = [row[-1] for row in test_data]
        scores.append(accuracy_metric(actual, predicted))
    return scores
 

In [12]:
dataset, class_indices = load_dataset_from_file(os.path.join(dataset_dir, 'seeds_dataset.txt'))
dataset = normalize_dataset(dataset)
nfolds = 5
ninput = len(dataset[0])-1
nhiden = 5
noutput = len(class_indices)
dataset[0:5]

[[0.441, 0.5021, 0.5708, 0.4865, 0.4861, 0.1893, 0.3452, 0],
 [0.4051, 0.4463, 0.6624, 0.3688, 0.5011, 0.0329, 0.2152, 0],
 [0.3494, 0.3471, 0.8793, 0.2207, 0.5039, 0.2515, 0.1507, 0],
 [0.3069, 0.3161, 0.7931, 0.2393, 0.5339, 0.1942, 0.1408, 0],
 [0.5241, 0.5331, 0.8648, 0.4274, 0.6643, 0.0767, 0.323, 0]]

In [10]:
scores = evaluate_algorithm(dataset, nfolds, ninput, nhiden, noutput, lr=0.1, epochs=500)
print('Score: %s' % scores)
print('Mean Score: %.3f%%' % (sum(scores)/float(len(scores))))

Score: [83.33333333333334, 88.09523809523809, 92.85714285714286, 90.47619047619048, 85.71428571428571]
Mean Score: 88.095%
