# Neural Network from Scratch

### Key Points:

Stage 2:
* 3-layer neural network
* Feedforward with backpropogation
* Framework: Numpy
* Training data: Wine Classification based on 13 features
* Loss function: Square Root Error
* Activation Function: Tanh
* Nodes in input layer = 13
* Nodes in first layer (Hidden layer) = 8
* Nodes in second layer (Hidden layer) = 5
* Nodes in third layer (output layer) = 3

In [1]:
import numpy as np
import pandas as pd

In [None]:
# CONSTANTS
INPUT_SHAPE  = 13
DIM_H1       = 8
DIM_H2       = 5
OUTPUT_SHAPE = 3

In [None]:
class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(INPUT_SHAPE,DIM_H1) 
        self.bias1      = np.zeros((1, DIM_H1))
        self.weights2   = np.random.rand(DIM_H1,DIM_H2) 
        self.bias2      = np.zeros((1, DIM_H1))
        self.weights3   = np.random.rand(5,1)   
        self.y          = y
        self.output     = np.zeros(self.y.shape)
        
    def feedforward(self):
        self.layer1 = tanh(np.dot(self.input, self.weights1))
        self.layer2 = tanh(np.dot(self.layer1, self.weights2))
        self.output = softmax(np.dot(self.layer2, self.weights3))
        
    def backprop(self):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

        # update the weights with the derivative (slope) of the loss function
        self.weights1 += d_weights1
        self.weights2 += d_weights2