# Backpropagation (Non-GD)

Backpropagation is short for "Backwards Propagation of errors" and refers to a specific (rather calculus intensive) algorithm for how weights in a neural network are updated in reverse order at the end of each training epoch.

---

In [9]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [10]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)

In [11]:
y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', -1, 1).reshape(100, 1)

In [12]:
x = df.iloc[0:100, [0, 2]].values

In [13]:
scaler = StandardScaler()
x = scaler.fit_transform(x, y)

In [14]:
x.shape, y.shape

((100, 2), (100, 1))

## Breakdown

In [31]:
inputs = x.shape[1]
hiddenNodes = 1000
outputNodes = 1
niter = 5

weights1 = np.random.randn(inputs, hiddenNodes)
weights2 = np.random.rand(hiddenNodes, outputNodes)

weights1.shape, weights2.shape

((2, 1000), (1000, 1))

In [32]:
def sigmoid(s):
    return 1 / (1+np.exp(-s))

def sigmoidPrime(s):
    return s * (1 - s)

In [33]:
for i in range(niter):
    print("Iteration:", i, '\n')
    print("Initial Values")
    print("Weights1:", weights1.shape)
    print("Weights2:", weights2.shape)
    print('-----------------------', '\n')
    
    print('Feed forward', '\n')
    # Weighted sum of inputs and hidden layer
    hidden_sum = np.dot(x, weights1)
    print("Hidden sum: Dot product of weights1 and our input matrix", hidden_sum.shape, '\n')
    
    # Acivations of weighted sum
    activated_hidden = sigmoid(hidden_sum)
    print("Activations of hidden sum: Values of our hidden sum that have been passed \
           \n  through our activation function", activated_hidden.shape, '\n')
    
    # Weight sum between hidden and output
    output_sum = np.dot(activated_hidden, weights2)
    print("Output sum: Dot product of our activations of hidden sum and weights2", output_sum.shape, '\n')
    
    # Final activation of output
    activated_output = sigmoid(output_sum)
    print("Activated output: Value of our output sum after pasing it though the activation function", activated_output.shape)
    print('-----------------------', '\n\n')
    
    #backprop
    print('Backpropagation of errors', '\n')
    o_error = np.subtract(y, activated_output) #error
    print("Output error: Difference between our expected and actual outputs", o_error.shape, '\n')
    o_delta = o_error * sigmoidPrime(activated_output)
    print("Output change: Product of our output error and the actual output after it has been passed through the \
           derivative of our activation function", o_delta.shape, '\n')
    
    z2_error = o_delta.dot(weights2.T) 
    print("Z2 error: Dot product of our output change and the transpose of weights2", z2_error.shape, '\n')
    z2_delta = z2_error*sigmoidPrime(activated_hidden)
    print("Z2 change: Product of our Z2 error and the activations of hidden sum after they have been passed\
           through the derivative of our activation function", z2_delta.shape, '\n')


    weights1 += x.T.dot(z2_delta) 
    print("New weights1: Dot product of the transpose of our input matrix and Z2 change", weights1.shape, '\n')
    weights2 += activated_hidden.T.dot(o_delta)    
    print("New weights 2: Dot product between the transpose of our activations of hidden sum and the output\
           change", weights2.shape)
    print('-----------------------')
    print('\n\n\n')
    
    
print("MSE:", str(np.mean(np.square(y - activated_output))))

Iteration: 0 

Initial Values
Weights1: (2, 1000)
Weights2: (1000, 1)
----------------------- 

Feed forward 

Hidden sum: Dot product of weights1 and our input matrix (100, 1000) 

Activations of hidden sum: Values of our hidden sum that have been passed            
  through our activation function (100, 1000) 

Output sum: Dot product of our activations of hidden sum and weights2 (100, 1) 

Activated output: Value of our output sum after pasing it though the activation function (100, 1)
----------------------- 


Backpropagation of errors 

Output error: Difference between our expected and actual outputs (100, 1) 

Output change: Product of our output error and the actual output after it has been passed through the            derivative of our activation function (100, 1) 

Z2 error: Dot product of our output change and the transpose of weights2 (100, 1000) 

Z2 change: Product of our Z2 error and the activations of hidden sum after they have been passed           through the deriva

In [34]:
class NeuralNetwork: 
    def __init__(self, inputs, hidden_nodes, output_nodes):
        # Set up Archietecture 
        self.inputs = inputs
        self.hiddenNodes = hidden_nodes
        self.outputNodes = output_nodes
        
        # Initial weights
        self.weights1 = np.random.randn(self.inputs, self.hiddenNodes)
        self.weights2 = np.random.rand(self.hiddenNodes, self.outputNodes)
    
    
    def sigmoid(self, s):
        return 1 / (1+np.exp(-s))
    
    
    def sigmoidPrime(self, s):
        return s * (1 - s)
    
    
    def feed_forward(self, X):
        # Weighted sum of inputs and hidden layer
        self.hidden_sum = np.dot(X, self.weights1)
        
        # Acivations of weighted sum
        self.activated_hidden = self.sigmoid(self.hidden_sum)
        
        # Weight sum between hidden and output
        self.output_sum = np.dot(self.activated_hidden, self.weights2)
        
        # Final activation of output
        self.activated_output = self.sigmoid(self.output_sum)
        
        return self.activated_output
    
    
    def backward(self, X, y, o):
        # np.subtract to keep shape consistant
        self.o_error = np.subtract(y, o) #error
        self.o_delta = self.o_error * self.sigmoidPrime(o)
        
        self.z2_error = self.o_delta.dot(self.weights2.T) 
        self.z2_delta = self.z2_error*self.sigmoidPrime(self.activated_hidden)
        
        self.weights1 += X.T.dot(self.z2_delta) 
        self.weights2 += self.activated_hidden.T.dot(self.o_delta) 
        
        
    def train(self, X, y, epochs = 10000):
        for i in range(epochs):
            o = self.feed_forward(X)
            self.backward(X, y, o)
        predictions = np.where(activated_output >= 0.0, 1, -1)
        print("That shizz is trained. \n Loss =", str(np.mean(np.square(y - predictions))))

In [35]:
model = NeuralNetwork(inputs = x.shape[1], hidden_nodes = 300, output_nodes = 1)

model.train(x, y)

That shizz is trained. 
 Loss = 2.0
