In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Simple Neural Network

## Description ...

In [3]:
def sigmoid(t):
    return 1/(1+np.exp(-t))

def sigmoid_derivative(t):
    return sigmoid(t) * (1 - sigmoid(t))

def mse(y_hat, y):
    return np.mean(np.square(y_hat - y))
    
def mse_derivative(y_hat, y):
    return 2*(y_hat - y)

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        
        self.W1 = np.random.rand(input_size, hidden_size) 
        self.W2 = np.random.rand(hidden_size, output_size)
        
        self.b1 = np.zeros((hidden_size, 1))
        self.b2 = np.zeros((output_size, 1))
        
    def feedforward(self, X, sigma):
        A0 = X
        
        self.Z1 = A0.dot(self.W1) + self.b1
        self.A1 = sigma(self.Z1)

        self.Z2 = self.A1.dot(self.W2) + self.b2
        self.A2 = sigma(self.Z2)
        
        return self.A2
        
    def backprop(self, X, y, d_sigma, d_loss, alpha=1):
        A0 = X
        m = X.shape[0]
        
        d_Z2 = d_loss(self.A2, y) * d_sigma(self.Z2)
        
        d_W2 = (1/m) * self.A1.T.dot(d_Z2)
        d_b2 = (1/m) * np.sum(d_Z2, axis=0, keepdims=True)
        
        d_Z1 = d_Z2.dot(self.W2.T) * d_sigma(self.A1)
        
        d_W1 = (1/m) * A0.T.dot(d_Z1)
        d_b1 = (1/m) * np.sum(d_Z1, axis=1, keepdims=True)
    
        self.W1 -= alpha * d_W1
        self.W2 -= alpha * d_W2
        
        self.b1 -= alpha * d_b1
        self.b2 -= alpha * d_b2
      
    def train(self, X, y, sigma, d_sigma, d_loss):
        self.feedforward(X, sigma)
        self.backprop(X, y, d_sigma, d_loss)
        

In [4]:
X=np.array(([0,0,1],[0,1,1],[1,0,1],[1,1,1]), dtype=float)
y=np.array(([0],[1],[1],[0]), dtype=float)

input_size = X.shape[1]
hidden_size = 4
output_size = 1
NN = NeuralNetwork(input_size, hidden_size, output_size)
for i in range(1000): # trains the NN 1,000 times
    if i % 100 ==0: 
        print ("for iteration # " + str(i) + "\n")
        forward = NN.feedforward(X, sigmoid)
        print ("Predicted Output: \n" + str(forward))
        print ("Loss: \n" + str(mse(forward, y)))
        print ("\n")
  
    NN.train(X, y, sigmoid, sigmoid_derivative, mse_derivative)

for iteration # 0

Predicted Output: 
[[0.80612118]
 [0.83351175]
 [0.83535902]
 [0.85561083]]
Loss: 
0.3591815602416715


for iteration # 100

Predicted Output: 
[[0.16652085]
 [0.87263352]
 [0.87340161]
 [0.17707881]]
Loss: 
0.022833867799705906


for iteration # 200

Predicted Output: 
[[0.08193853]
 [0.94196288]
 [0.94204705]
 [0.08169497]]
Loss: 
0.005028710960775753


for iteration # 300

Predicted Output: 
[[0.06087484]
 [0.95780062]
 [0.95781764]
 [0.06007493]]
Loss: 
0.0027187201663977645


for iteration # 400

Predicted Output: 
[[0.05063248]
 [0.96525897]
 [0.96525867]
 [0.04983238]]
Loss: 
0.0018652032000555827


for iteration # 500

Predicted Output: 
[[0.04433341]
 [0.96975841]
 [0.96975199]
 [0.04360819]]
Loss: 
0.0014241552034286657


for iteration # 600

Predicted Output: 
[[0.0399658 ]
 [0.97283689]
 [0.97282801]
 [0.03932048]]
Loss: 
0.0011548790958556116


for iteration # 700

Predicted Output: 
[[0.0367087 ]
 [0.97511008]
 [0.97510022]
 [0.03613502]]
Loss: 
0.00097

Bibliography:
  
1. Softmax
    * https://deepnotes.io/softmax-crossentropy
    * https://aimatters.wordpress.com/2019/06/17/the-softmax-function-derivative/
    * https://aimatters.wordpress.com/2020/06/14/derivative-of-softmax-layer/
    
2. Entropy/ Cross-Entropy
    * https://www.youtube.com/watch?v=ErfnhcEV1O8
    * https://machinelearningmastery.com/cross-entropy-for-machine-learning/
    * https://datascience.stackexchange.com/questions/20296/cross-entropy-loss-explanation

3. Neural Network:
    * https://cs231n.github.io/neural-networks-case-study/
    * https://cs231n.github.io/
    * https://github.com/tyz910
    * http://cs231n.stanford.edu/handouts/linear-backprop.pdf
    
4. Numpy axis:
    * https://i.stack.imgur.com/Z29Nn.jpg