Prepare the function necessary for building neural network 

In [3]:
# Initialize a network
# n_inputs: the number of inputs
# n_hidden_1: the number of neurons in 1st hidden layer, each neuron has n_inputs+1 weights
# n_hidden_2: the number of neurons in 2nd hidden layer, each neuron has n_hidden_1+1 weights

from random import seed
from random import random
from random import randrange

# n_outputs: the number of outputs, each neuron has n_hidden+1 weights
# network contains all the weight, has 3 layers 
# network[0] has the structure [[], [], []] (cause next layer has 3 neurons)
# network[0][0] contains [theta_{11}^(1),theta_{12}^(1), ... ,theta_{18}^(1), theta_{10}^(1) (weight for bias)]
# Outlayer has 2 neurons, if index 1 is larger, then takes 1 as output 
def initialize_network(n_inputs, n_hidden_1, n_hidden_2, n_outputs):
    # As "weight": [s^{l+1} times s^{l} + 1]
    network = list()
    hidden_layer_1 = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden_1)]
    network.append(hidden_layer_1)
    hidden_layer_2 = [{'weights':[random() for i in range(n_hidden_1 + 1)]} for i in range(n_hidden_2)]
    network.append(hidden_layer_2)
    output_layer = [{'weights':[random() for i in range(n_hidden_2 +1)]} for i in range(n_outputs)]
    network.append(output_layer)
    return network

from math import exp
# Calculate neuron activation for an input
# Finding a^1(2) ... in lecture slide P.6 
def activate(weights, inputs):
    #This taking the last value of weight as bias
    activation = weights[-1]*1
    for i in range(len(weights)-1):
        activation += weights[i] * inputs[i]
    return activation

# Transfer neuron activation
# This is the sigmoid function
def transfer(activation):
    return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output
# row has a structure [..., ... ,...]
# inputs is the simple list contains [a_1^(2), a_2^(2), ...] (the activation value of each layer)
def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        # The following for-loop is calculating a_1^(2), a_2^(2), ...
        # And the result is put in the dictionaries, with "output" as tag
        # It has the structure {}
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

def transfer_derivative(output):
    return output * (1.0 - output)

# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = list()
        # When it is not the last layer of the structure 
        if i != len(network)-1:
            for j in range(len(layer)):
                error = 0.0
                for neuron in network[i + 1]:
                    error += (neuron['weights'][j] * neuron['delta'])
                errors.append(error)
        else:
            for j in range(len(layer)):
                neuron = layer[j]
                errors.append(expected[j] - neuron['output'])

        # Mark the difference as delta     
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# Update network weights with error
def update_weights(network, row, l_rate):
    for i in range(len(network)):
        inputs = row[:-1]
        if i != 0:
            inputs = [neuron['output'] for neuron in network[i-1]]
        for neuron in network[i]:
            for j in range(len(inputs)):
                neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]
            neuron['weights'][-1] += l_rate * neuron['delta']
# Training has 3 main steps
# 1. Forward propagate
# 2. Backward propagate and get all delta 
# 3. Update the weight and repeats the steps for every output 
def train_network(network, train, l_rate, n_epoch, n_outputs):
    for epoch in range(n_epoch):
        sum_error = 0
        for row in train:
            outputs = forward_propagate(network, row)
            expected = [0 for i in range(n_outputs)]
            expected[int(row[-1])] = 1
            # This is the a^4 - y 
            sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
            backward_propagate_error(network, expected)
            update_weights(network, row, l_rate)
        print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

# Make a prediction with a network
def predict(network, row):
    outputs = forward_propagate(network, row)
    return outputs.index(max(outputs))

The following part is doing some feature scaling 

In [4]:
# Feature scaling 
# Find the min and max values for each column
def dataset_minmax(dataset):
    minmax = list()
    stats = [[min(column),max(column)] for column in zip(*dataset)]
    return stats

# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
    for row in dataset:
        for i in range(len(row)-1):
            row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])
            

The following part is preparing cross validation 

In [5]:
# Split a dataset into k folds
# k-fold cross-validation with 5 folds. 769/5=153.8 or 153 records in each fold.
def cross_validation_split(dataset, n_folds=5):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct/float(len(actual)) * 100.0

After all the preaparation, we can make use of all the functions and train the neural network

In [6]:
# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
    folds = cross_validation_split(dataset, n_folds)
    scores = list()
    for fold in folds:
        train_set = list(folds)
        train_set.remove(fold)
        train_set = sum(train_set, [])
        test_set = list()
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None

        predicted, network = algorithm(train_set, test_set, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
    return (scores, network)

# Backpropagation Algorithm With Stochastic Gradient Descent
def back_propagation(train, test, l_rate, n_epoch, n_hidden_1, n_nidden_2):
    #the train structure is like [[... ,... ,... ,0], [... ,... ,... ,1], [... ,... ,... ,0]...]
    # n_inputs - take the 1st list, then ignore the last term
    # n_outputs - either 0 or 1, so it is equal to 2 
    n_inputs = len(train[0]) - 1
    n_outputs = len(set([row[-1] for row in train]))

    # Add terms "n_hidden_2" here
    network = initialize_network(n_inputs, n_hidden_1, n_hidden_2, n_outputs)
    train_network(network, train, l_rate, n_epoch, n_outputs)
    predictions = list()
    for row in test:
        prediction = predict(network, row)
        predictions.append(prediction)
    print(n_inputs)
    print(n_outputs)
    return(predictions, network)

In [7]:
# Test Backprop on Seeds dataset
seed(1)

# Convert the "outcome" to int  
import pandas as pd

df = pd.read_csv("./support-vector-machine-example-1.csv")
df.head(10)
dataset = df.values.tolist()

# normalize input variables
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)


# evaluate algorithm
n_folds = 5
l_rate = 0.1
n_epoch = 100
n_hidden_1 = 3
n_hidden_2 = 5 
scores, trained_network = evaluate_algorithm(dataset, back_propagation, n_folds, l_rate, n_epoch, n_hidden_1,n_hidden_2)

print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

>epoch=0, lrate=0.100, error=301.319
>epoch=1, lrate=0.100, error=281.014
>epoch=2, lrate=0.100, error=280.962
>epoch=3, lrate=0.100, error=280.909
>epoch=4, lrate=0.100, error=280.855
>epoch=5, lrate=0.100, error=280.800
>epoch=6, lrate=0.100, error=280.744
>epoch=7, lrate=0.100, error=280.686
>epoch=8, lrate=0.100, error=280.627
>epoch=9, lrate=0.100, error=280.565
>epoch=10, lrate=0.100, error=280.502
>epoch=11, lrate=0.100, error=280.436
>epoch=12, lrate=0.100, error=280.368
>epoch=13, lrate=0.100, error=280.297
>epoch=14, lrate=0.100, error=280.224
>epoch=15, lrate=0.100, error=280.147
>epoch=16, lrate=0.100, error=280.067
>epoch=17, lrate=0.100, error=279.984
>epoch=18, lrate=0.100, error=279.896
>epoch=19, lrate=0.100, error=279.804
>epoch=20, lrate=0.100, error=279.707
>epoch=21, lrate=0.100, error=279.605
>epoch=22, lrate=0.100, error=279.497
>epoch=23, lrate=0.100, error=279.382
>epoch=24, lrate=0.100, error=279.259
>epoch=25, lrate=0.100, error=279.128
>epoch=26, lrate=0.100

After training, we can make use of the model to predict whether a patient get diabetes(i.e. a probability between 0 and 1, with 1 is getting diabetes, while 0 is not getting diabetes)

I just randomly create some numbers and test whether the person would have chance to get diabetes. 

(The last entry of the the test_row is outcome, but it does not mean the outcome is 1. **It is just created for the sake of program design**)

In [9]:
# row has a structure like the following
# [Pregnacies, glucose, blood pressure, skin thickness, Insulin, BMI, DiabetesPedigreeFunction, Age, (Outcome)]
import random
random_test_row = [random.randint(0,30),
                   random.randint(0,150),
                   random.randint(0,96),
                   random.randint(0,40),
                   random.randint(0,100),
                   random.uniform(0,35.3),
                   random.uniform(0,2.5),
                   random.randint(0,60),
                 random.choice([0,1])]

predicted_prob = predict(trained_network, random_test_row)
print(random_test_row)
print("The result of random_test_row is " + str(predicted_prob))

[16, 80, 69, 36, 70, 9.97249206121743, 1.028591683917503, 60, 1]
The result of random_test_row is 1
