# Intro to NNets

Reference: https://victorzhou.com/blog/intro-to-neural-networks/

## Instructions

1. Create Virtual Environment: `python3 -m venv datascience-venv`
2. Set Virtual Environment: `source datascience-venv/bin/activate`
3. Install JupyterLab in your Virtual Env using pip: `pip3 install jupyterlab`
4. Install dependencies (`numpy`, `pandas`, `scikit-learn`) into the virtual environment
   * `pip3 install pandas`, `pip3 install scikit-learn`
5. Add your Virtual Environment as a kernel to Jupyterlab: `python3 -m ipykernel install --user --name=datascience-venv`
6. Start JupyterLab from the virtual environment: `jupyter-lab --notebook-dir <location of your notebooks>`
7. Make sure your set your Virtual Env's kernel in the notebook that you're using

## Neural Net being built

![Neural Net being built](pngs/neural_net_intro_network.svg "Neural Net being built")

In [3]:
import numpy as np
import pandas as pd

In [67]:
# Play with types
if False:
    l = np.array([1,2,3,4,5,6])
    l[4:6]
    
    [1,2,3,4,5,6][0:2]

In [85]:
# Play with types
if True:
    testdataabcd = np.array([
      [-2, -1],  # Alice
      [25, 6],   # Bob
      [17, 4],   # Charlie
      [-15, -6], # Diana
    ])
testdataabcd[1]
# testdataabcd.shape[1]

array([25,  6])

## Formulaic Explanation: Backpropagation

Refer to the reference URL to follow along for the derivative.

![Derivatives Intuition](pngs/derivatives_eq.png "Derivatives Intuition")

```
# Loss Function - RMSE
L = (1/n) * np.sum(((yactual - ypred) ** 2))

# Partial derivative of Loss Function / Ypred
dL_by_dypred = -2 * (1 - ypred) # When we take 1 sample at a time

# Derivative of sigmoid function
sigmoid'(a) = sigmoid(a) * (1 - sigmoid(a))

# Partial derivative of dypred_by_dh1
dypred_by_dh1 = w5 * sigmoid'(w5*h1 + w6*h2 + b3)
# dypred_by_dh1 = w5 * sigmoid(w5*h1 + w6*h2 + b3) * (1 - sigmoid(w5*h1 + w6*h2 + b3))

# Partial derivative of h1 over w1
dh1_by_dw1 = w1 * sigmoid'(w1*x1 + w2*x2 + b1)

# For SGD, you'll need to compute the partial derivatives for all the weights & biases and then iterate
## The above is just for weight w1
```

## Intuition Explanation: Stochastic Gradient Descent to train the NNet


![SGD](pngs/stochastic_grad_descent_intro.png "SGD Intro")

In [102]:
# sigmoid activation function
def sigmoid(logit: float) -> float:
    return 1 / (1 + np.exp(-1 * logit))

def derivative_of_sigmoid(logit: float) -> float:
    return sigmoid(logit) * (1 - sigmoid(logit))

# RMSE - aka Root Mean Square Error
def rmse_impl(ypred: np.array, yactual: np.array):
    return ((ypred - yactual) ** 2).mean()

class Neuron:
    def __init__(self, weights, bias, activation_function):
        self.weights: np.array = weights
        self.bias: int = bias
        self.activation_function = activation_function
    def feed_forward(self, input_vector: np.array) -> float:
        # note: Dot Product of matrixes returns a scalar value
        return self.activation_function(np.dot(input_vector, self.weights) + self.bias)
    def feed_forward_pre_activation(self, input_vector: np.array) -> float:
        return (np.dot(input_vector, self.weights) + self.bias)

class NeuralNet:
    def __init__(self, weights: list, biases: list):
        self.weights_w1_w2: np.array = np.array(weights[0:2])
        self.weights_w3_w4: np.array = np.array(weights[2:4])
        self.weights_w5_w6: np.array = np.array(weights[4:6])
        self.bias_h1 = np.array(biases[0])
        self.bias_h2 = np.array(biases[1])
        self.bias_h3 = np.array(biases[2])

        # hidden layer
        self.h1: Neuron = Neuron(self.weights_w1_w2, self.bias_h1, sigmoid)
        self.h2: Neuron = Neuron(self.weights_w3_w4, self.bias_h2, sigmoid)

        # output layer
        self.o1: Neuron = Neuron(self.weights_w5_w6, self.bias_h3, sigmoid)

    def feed_forward(self, input_vector: np.array) -> float:
        # Feed the activated return values from h1 and h2 into o1
        return self.o1.feed_forward(
            np.array([
                self.h1.feed_forward(input_vector),
                self.h2.feed_forward(input_vector)
            ])
        )

    def train(self, in_train_X: np.array, in_train_Y: np.array, learn_rate: float, epochs: int):

        result = ()

        _num_samples = in_train_X.shape[0]
        
        for _epoch in range(epochs):
            for _iter in range(_num_samples):
                
                train_X: np.array = in_train_X[_iter]
                y_actual = in_train_Y[_iter]
                
                ypred = self.feed_forward(train_X)
    
                # Calculate partial derivatives
                dL_by_dypred = -2 * (y_actual - ypred)
    
                # neurons
                h1_pre_activation = self.h1.feed_forward_pre_activation(train_X)
                h2_pre_activation = self.h2.feed_forward_pre_activation(train_X)
                o1_pre_activation = self.o1.feed_forward_pre_activation(
                    np.array([
                        self.h1.feed_forward(train_X),
                        self.h2.feed_forward(train_X)
                    ])
                )
    
                # dypred_by_dh1 = w5 * sigmoid'(w5*h1 + w6*h2 + b3)
                dypred_by_dh1 = self.weights_w5_w6[0] * derivative_of_sigmoid(o1_pre_activation)
                dypred_by_dh2 = self.weights_w5_w6[1] * derivative_of_sigmoid(o1_pre_activation)
    
                # dypred_by_w5 = h1 * sigmoid'(w5*h1 + w6*h2 + b3)
                dypred_by_w5 = self.h1.feed_forward(train_X) * derivative_of_sigmoid(o1_pre_activation)
                dypred_by_w6 = self.h2.feed_forward(train_X) * derivative_of_sigmoid(o1_pre_activation)
                dypred_by_b3 = 1 * derivative_of_sigmoid(o1_pre_activation)
    
                # dh1_by_dw1 = w1 * sigmoid'(w1*x1 + w2*x2 + b1)
                dh1_by_dw1 = self.weights_w1_w2[0] * derivative_of_sigmoid(h1_pre_activation)
                dh1_by_dw2 = self.weights_w1_w2[1] * derivative_of_sigmoid(h1_pre_activation)
                dh1_by_db1 = 1 * derivative_of_sigmoid(h1_pre_activation)
    
                dh2_by_dw3 = self.weights_w3_w4[0] * derivative_of_sigmoid(h2_pre_activation)
                dh2_by_dw4 = self.weights_w3_w4[1] * derivative_of_sigmoid(h2_pre_activation)
                dh2_by_db2 = 1 * derivative_of_sigmoid(h2_pre_activation)
    
                do1_by_dw5 = self.weights_w5_w6[0] * derivative_of_sigmoid(o1_pre_activation)
                do1_by_dw6 = self.weights_w5_w6[1] * derivative_of_sigmoid(o1_pre_activation)
                do1_by_db3 = 1 * derivative_of_sigmoid(o1_pre_activation)
                
    
                # Update the weights to be used in the next gradient descent cycle
                # w1, w2, b1
                self.weights_w1_w2[0] -= learn_rate * dL_by_dypred * dypred_by_dh1 * dh1_by_dw1
                self.weights_w1_w2[1] -= learn_rate * dL_by_dypred * dypred_by_dh1 * dh1_by_dw2
                self.bias_h1 -= learn_rate * dL_by_dypred * dypred_by_dh1 * dh1_by_db1
    
                # w3, w4, b2
                self.weights_w3_w4[0] -= learn_rate * dL_by_dypred * dypred_by_dh2 * dh2_by_dw3
                self.weights_w3_w4[1] -= learn_rate * dL_by_dypred * dypred_by_dh2 * dh2_by_dw4
                self.bias_h2 -= learn_rate * dL_by_dypred * dypred_by_dh2 * dh2_by_db2
    
                # w5, w6, b3
                self.weights_w5_w6[0] -= learn_rate * dL_by_dypred * dypred_by_w5
                self.weights_w5_w6[1] -= learn_rate * dL_by_dypred * dypred_by_w6
                self.bias_h3 -= learn_rate * dL_by_dypred * dypred_by_b3
                
            # --- Calculate total loss at the end of each epoch
            if _epoch % 10 == 0:
              y_preds = np.apply_along_axis(self.feed_forward, 1, in_train_X)
              loss = rmse_impl(in_train_Y, y_preds)
              print("Epoch %d loss: %.3f" % (_epoch, loss))

In [100]:
# tests 

# neuron methods test
weights = np.array([0, 1]) # w1, w2 in the diagram above
bias = 4 # b1 in the diagram above
X = np.array([2, 3]) # input vector - weight, height in the diagram above

n = Neuron(weights=weights, bias=bias, activation_function=sigmoid)
h1 = n.feed_forward(X)

assert isinstance(h1, float), "Feed forward type is not float"
h1 # Expected value is meant to look like - 0.9990889488055994

# neural net methods test
nnet = NeuralNet([0, 1, 0, 1, 0, 1], [0, 0, 0])
o1 = nnet.feed_forward(X)

assert o1 == 0.7216325609518421 # Got the value from the reference link for assert comparison


# rmse function test
y_true = np.array([1, 0, 0, 1])
y_pred = np.array([0, 0, 0, 0])

rmse = rmse_impl(y_true, y_pred)
rmse

# test that the feed_forward_pre_activation method works correctly
h1_prior_to_activation = n.feed_forward_pre_activation(X)
assert(
    np.dot(weights, X) + bias == h1_prior_to_activation
)

In [104]:
# Labelled training dataset
data = np.array([
  [-2, -1],  # Alice
  [25, 6],   # Bob
  [17, 4],   # Charlie
  [-15, -6], # Diana
])
all_y_trues = np.array([
  1, # Alice
  0, # Bob
  0, # Charlie
  1, # Diana
])

# Train our neural network!
network = NeuralNet(
    np.random.normal(size=(1,6)).tolist()[0], # weights
    np.random.normal(size=(1,3)).tolist()[0] # biases
)
network.train(data, all_y_trues, 0.1, 1000)

Epoch 0 loss: 0.287
Epoch 10 loss: 0.198
Epoch 20 loss: 0.155
Epoch 30 loss: 0.126
Epoch 40 loss: 0.105
Epoch 50 loss: 0.088
Epoch 60 loss: 0.075
Epoch 70 loss: 0.064
Epoch 80 loss: 0.056
Epoch 90 loss: 0.049
Epoch 100 loss: 0.043
Epoch 110 loss: 0.039
Epoch 120 loss: 0.035
Epoch 130 loss: 0.031
Epoch 140 loss: 0.029
Epoch 150 loss: 0.026
Epoch 160 loss: 0.024
Epoch 170 loss: 0.022
Epoch 180 loss: 0.021
Epoch 190 loss: 0.019
Epoch 200 loss: 0.018
Epoch 210 loss: 0.017
Epoch 220 loss: 0.016
Epoch 230 loss: 0.015
Epoch 240 loss: 0.014
Epoch 250 loss: 0.013
Epoch 260 loss: 0.013
Epoch 270 loss: 0.012
Epoch 280 loss: 0.012
Epoch 290 loss: 0.011
Epoch 300 loss: 0.011
Epoch 310 loss: 0.010
Epoch 320 loss: 0.010
Epoch 330 loss: 0.009
Epoch 340 loss: 0.009
Epoch 350 loss: 0.009
Epoch 360 loss: 0.008
Epoch 370 loss: 0.008
Epoch 380 loss: 0.008
Epoch 390 loss: 0.008
Epoch 400 loss: 0.007
Epoch 410 loss: 0.007
Epoch 420 loss: 0.007
Epoch 430 loss: 0.007
Epoch 440 loss: 0.007
Epoch 450 loss: 0.006

In [113]:
# Make some predictions - based on the model trained in the previous cell

emily = np.array([-7, -3]) # 128 pounds, 63 inches - expected prediction is: 0.951 - F
frank = np.array([20, 2])  # 155 pounds, 68 inches - expected prediction is: 0.039 - M
frank2 = np.array([20, 4])
print("Emily: %.3f" % network.feed_forward(emily))
print("Frank: %.3f" % network.feed_forward(frank))
print("Frank2: %.3f" % network.feed_forward(frank2))

print("Training data: Alice: %.3f" % network.feed_forward(np.array([-2, -1])))
print("Training data: Bob: %.3f" % network.feed_forward(np.array([25, 6])))

Emily: 0.972
Frank: 0.638
Frank2: 0.072
Training data: Alice: 0.950
Training data: Bob: 0.056


In [None]:
## END RESULT
The model doesn