## Backpropagation Example Implementation

This example has been adapted form the below blog post:

https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/

For the mathematical explaination read my blog post and neural network fundamentals series here:

https://samzee.net/2019/02/20/neural-networks-learning-the-basics-backpropagation/

The standard process for building the neural network is as follows:
1. Initialize Network
2. Forward Propagate
3. Back Propagate Error
4. Train Network

### Reproduce example in blog post

In [46]:
import numpy as np
import random
import io
import pandas as pd
import plotly.express as px
import numpy
from sklearn.model_selection import train_test_split

In [7]:
def initialze_network(n_input, n_hidden, n_output):
    network = []
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    output_layer = {'weights': [np.random.rand(n_hidden,n_output)]}
    network.append(output_layer)
    return network
#defines sigmoid function
def sigmoid(x):
    s = 1/(1 + np.exp(-x))
    return s
#derivative of the loss function delta rule
def dloss(target, output):
    error = -(target - output)
    return error
#forward pass
def forward_pass(weights, inputs):
    a = np.dot(inputs, weights)
    return a

In [8]:
#create the network
random.seed(1)
network = initialze_network(2, 2, 1)
weights = [neuron['weights'] for neuron in network]

In [9]:
#testing example
x = np.array([[0.01,0.2],[0.15,0.08]])
y = np.array([[0.01,0.5],[0.7,0.1]])
z = np.array([[0.24,0.55]])

inputs = np.array([0.5, 0.1])
target = np.array([0.1])
inputs = inputs.reshape(1, 2)

In [10]:
#update randomised inputs to fixed inputs
weights[0][0] = x
weights[1][0] = y
weights[2][0] = z

In [11]:
h1 = forward_pass(weights[0][0], inputs)
h2 = forward_pass(weights[1][0], h1)
#output
outputs = forward_pass(weights[2][0].T, h2)

In [12]:
outputs

array([[0.029632]])

In [13]:
outputs

array([[0.029632]])

In [14]:
#the backward pass outputs
def out_gradient(target, output, inputs):
    gradient = (-(target - output)*output*(1- output))*inputs
    return gradient

In [15]:
h2

array([[0.0758, 0.0208]])

In [16]:
target

array([0.1])

In [17]:
grad = [out_gradient(target, outputs, item) for item in h2]
#output gradients
gradients = np.vstack(grad).T
gradients

array([[-1.53370504e-04],
       [-4.20858375e-05]])

In [75]:
def hid_gradient(target, output, inputs, weight, neurons):
    neuron = []
    for i in range(len(output)):
        s_o = (-(target[i] - output[i]))*(output[i]*(1- output[i]))
        k = s_o*weight[:, neurons][i]
        neuron.append(k)
    out = np.sum(neuron)*inputs
    return out

In [22]:
n_hidden_two = 2
w = weights[2][0]
mygrads = []
for neuron in range(n_hidden_two):
    grad = np.array([hid_gradient(target, outputs, myinputs, w , neuron) for myinputs in h1])
    mygrads.append(grad)
mygrads = np.vstack(mygrads)
mygrads

array([[-9.71211635e-06, -5.24454283e-05],
       [-2.22569333e-05, -1.20187440e-04]])

In [23]:
def hid_two_layer(target, outputs, inputs, w1, neuron, w2):
    mygrads = []
    for i in range(len(w1)):
        grad = hid_gradient(target, outputs, w1[:,neuron][i], w2,i)
        mygrads.append(grad)
    xsum = np.sum(mygrads)*inputs
    return xsum

In [24]:
n_hidden_two = 2
h2_mygrads = []
w1 = weights[1][0]
w2 = weights[2][0]
for neuron in range(n_hidden_two):
    grad = np.array([hid_two_layer(target, outputs, myinputs, w1, neuron, w2) for myinputs in inputs])
    h2_mygrads.append(grad)
h2_mygrads = np.vstack(h2_mygrads)
h2_mygrads

array([[-3.91924362e-04, -7.83848723e-05],
       [-1.77043788e-04, -3.54087575e-05]])

In [25]:
#set the learning rate
#update weights
l = 0.5
weights[0][0] = weights[0][0] - l*h2_mygrads
weights[1][0] = weights[1][0] - l*mygrads
weights[2][0] = weights[2][0].T - l*gradients


In [26]:
weights[2][0] = weights[2][0].T

In [27]:
weights[2][0]

array([[0.24007669, 0.55002104]])

In [28]:
weights[2][0]

array([[0.24007669, 0.55002104]])

## Put Everything Together

In [304]:
def initialze_network(n_input, n_hidden, n_output):
    network = []
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    output_layer = {'weights': [np.random.rand(n_hidden,n_output)]}
    network.append(output_layer)
    return network
#forward pass
#defines sigmoid function
def sigmoid(x):
    s = 1/(1 + np.exp(-x))
    return s
#derivative of the loss function delta rule
def dloss(target, output):
    error = -(target - output)
    return error
#forward pass
def forward_pass(weights, inputs):
    a = np.dot(weights, inputs)
    return a
def out_gradient(target, output, inputs):
    gradient = (-(target - output)*output*(1- output))*inputs
    return gradient

#onehidden layers
def hid_gradient(target, output, inputs, weight, neurons):
    neuron = []
    for i in range(len(output)):
        s_o = -(target[i] - output[i])*(output[i]*(1- output[i]))
        k = s_o*weight[:, neurons][i]
        neuron.append(k)
    out = np.sum(neuron)*inputs
    return out
def hid_two_layer(target, outputs, inputs, w1, neuron, w2):
    mygrads = []
    for i in range(len(w1)):
        grad = hid_gradient(target, outputs, w1[:,neuron][i], w2,i)
        mygrads.append(grad)
    xsum = np.sum(mygrads)*inputs
    return xsum

#loss function
def lossfunction(target,output):
    loss = (1/2)*(target - output)**2
    return loss

In [305]:
#inputs = np.array([[0.5, 0.1], [0.5, 0.1], [0.5, 0.1]])
#target = np.array([0.1])

Dataset from:

https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

An alternative implementation can also be accessed via this link

In [326]:
def train_network(network, inputs, target, l, n_epoch):
    for epoch in range(n_epoch):
        #mygrads = []
        h2_mygrads = []
        myloss = []
        mygrads = []
        for i in range(len(inputs)):
            h1 = forward_pass(weights[0][0].T, inputs[i])
            h2 = forward_pass(weights[1][0], h1)
            outputs = forward_pass(weights[2][0], h2)
            outputs = sigmoid(outputs)
            loss = lossfunction(target[i], outputs)
            t_loss = np.sum(loss)
            myloss.append(t_loss)
            grad = [out_gradient(target[i], outputs, item) for item in h2]
            #print("this is output gradient {} for iteration {}".format(grad, i))
            gradients = np.vstack(grad).T     #output gradients
            n_hidden_two = 2
            w = weights[2][0]
            #hidden layer two
            for neuron in range(n_hidden_two):
                grad = np.array([hid_gradient(target, outputs, myinputs, weights[2][0] , neuron) for myinputs in h1])
                mygrads.append(grad)
            mygrads = np.vstack(mygrads)
            #hidden layer one
            n_hidden_two = 2
            w1 = weights[1][0]
            w2 = weights[2][0]
            for neuron in range(n_hidden_two):
                grad = np.array([hid_two_layer(target, outputs, myinputs, w1, neuron, w2) for myinputs in inputs[i]])
                h2_mygrads.append(grad)
            h2_mygrads = np.vstack(h2_mygrads)
            weights[0][0] = weights[0][0].T - l*h2_mygrads
            weights[1][0] = weights[1][0] - l*mygrads
            weights[2][0] = weights[2][0] - l*gradients
            weights[0][0] = weights[0][0].T
            #weights[2][0] = weights[2][0].T
            final_loss = np.sum(myloss)
            #print(weights[0][0])
            #print(final_loss)
            #print('>epoch={}, error={}'.format(n_epoch, final_loss))
            return final_loss

In [327]:
#inputs = np.array(inputs)
#training samples
inputs = np.array([[2.7810836, 2.550537003], [1.465489372, 2.362125076], [1.396561688, 4.400293529], [1.38807019, 1.850220317],
                  [3.06407232, 3.005305973],[7.627531214, 2.759262235],[5.332441248, 2.088626775],[6.922596716, 1.77106367],
                  [8.675418651, 0.242068655], [7.673756466, 3.508563011]])
target = np.array([[0], [0], [0], [0], [0], [1], [1], [1], [1], [1]])

In [328]:
network = initialze_network(inputs.shape[1], 2, target.shape[1])
weights = [neuron['weights'] for neuron in network]
x = np.array([[0.01,0.2],[0.15,0.08]])
y = np.array([[0.01,0.5],[0.7,0.1]])
z = np.array([[0.24,0.55]])
weights[0][0] = x
weights[1][0] = y
weights[2][0] = z
n_epoch = [x for x in range(1, 20)]
loss = []
for n in n_epoch:
    myloss = train_network(network, inputs, target, 0.5, n)
    loss.append(myloss)

Our neural network works as we see the decrease in the loss function with each epoch

In [329]:
#from matplotlib import pyplot
#pyplot.plot(n_epoch[:], loss[:])
#pyplot.plot(series_in, relu_out)
#pyplot.show()

In [330]:
import plotly.graph_objects as go
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=n_epoch[:], y=loss[:]))
fig.show()

## Loan data

In [362]:
file = "C:/Users/samantha.vandermerwe/Documents/NeuralNetworks/loan.csv"
df = pd.read_csv(file)
df.shape

(163987, 15)

In [363]:
df.head()

Unnamed: 0,loan_amnt,term,int_rate,emp_length,home_ownership,annual_inc,purpose,addr_state,dti,delinq_2yrs,revol_util,total_acc,longest_credit_length,bad_loan,longest_credit_length.1
0,5000,36,10.65,10.0,RENT,24000.0,credit_card,AZ,27.65,0.0,83.7,9.0,26.0,0,26.0
1,2500,60,15.27,0.0,RENT,30000.0,car,GA,1.0,0.0,9.4,4.0,12.0,1,12.0
2,2400,36,15.96,10.0,RENT,12252.0,small_business,IL,8.72,0.0,98.5,10.0,10.0,0,10.0
3,10000,36,13.49,10.0,RENT,49200.0,other,CA,20.0,0.0,21.0,37.0,15.0,0,15.0
4,5000,36,7.9,3.0,RENT,36000.0,wedding,AZ,11.2,0.0,28.3,12.0,7.0,0,7.0


In [364]:
#apply one hot encoding
home_ownership = pd.get_dummies(df['home_ownership'])
purpose = pd.get_dummies(df['purpose'])
addr_state = pd.get_dummies(df['addr_state'])

In [365]:
df = df.drop(['home_ownership', 'purpose', 'addr_state'],axis = 1)
# Join the encoded df
df = df.join(home_ownership)
df = df.join(purpose)
df = df.join(addr_state)

In [366]:
df.shape

(163987, 82)

In [380]:
df_x = df.drop(['bad_loan'], axis = 1)
df_y = df['bad_loan']
df_norm = (df_x - df_x.mean()) / (df_x.max() - df_x.min())
X = df_norm.values
y = df_y.values
y= y.reshape(y.shape[0], 1)
X.shape, y.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [381]:
network = initialze_network(X_train.shape[1], 2, y_train.shape[1])
weights = [neuron['weights'] for neuron in network]

In [382]:
network = initialze_network(X_train.shape[1], 2, y_train.shape[1])
weights = [neuron['weights'] for neuron in network]
x = weights[0][0]
y = np.array([[0.01,0.5],[0.7,0.1]])
z = np.array([[0.24,0.55]])
weights[0][0] = x
weights[1][0] = y
weights[2][0] = z

In [396]:
n_epoch = [x for x in range(1, 200)]
loss = []
for n in n_epoch:
    myloss = train_network(network, X_train, y_train, 0.5, n)
    loss.append(myloss)

In [397]:
import plotly.graph_objects as go
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=n_epoch[:], y=loss[:]))
fig.show()

In [385]:
#predict neural network
preds = []
inputs = np.array(X_test)
for i in range(len(inputs)):
    h1 = forward_pass(weights[0][0].T, inputs[i])
    h2 = forward_pass(weights[1][0], h1)
    outputs = forward_pass(weights[2][0], h2)
    outputs = sigmoid(outputs)
    preds.append(outputs)
preds = np.vstack(preds)

In [386]:
preds

array([[0.00134844],
       [0.13906487],
       [0.89967534],
       ...,
       [0.14995518],
       [0.90980156],
       [0.02864767]])

In [387]:
mypreds = []
for prediction in preds:
    if prediction > 0.5:
        prediction = 1
    else:
        prediction = 0
    mypreds.append(prediction)


invalid value encountered in greater



In [388]:
correct = 0
cor = []
for i in range(len(y_test)):
    if y_test[i] == mypreds[i]:
        correct += 1
    cor.append(correct)

In [389]:
correct/len(y_test)*100

52.56418074272822

In [390]:
from sklearn.metrics import f1_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

In [391]:
f1 = f1_score(y_test, mypreds)*100
acc = accuracy_score(y_test, mypreds)*100
auc = roc_auc_score(y_test, mypreds)*100
precision = precision_score(y_test, mypreds)*100
matrix = confusion_matrix(y_test, mypreds)

In [392]:
f1, acc, auc, precision

(28.587166069953184, 52.56418074272822, 51.98614514736213, 19.849566547679757)

In [393]:
matrix

array([[14126, 12574],
       [ 2984,  3114]], dtype=int64)