<a href="https://colab.research.google.com/github/mbhaskar1/ML-Coursera/blob/master/neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.utils import to_categorical

Using TensorFlow backend.


Import the dataset:

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(f'Image Shape: {X_train[0].shape}')
print(f'Training Set Size: {X_train.shape[0]}')
print(f'Testing Set Size: {X_test.shape[0]}')
print(f'y Element Shape: {y_train[0].shape}')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Image Shape: (28, 28)
Training Set Size: 60000
Testing Set Size: 10000
y Element Shape: (10,)


Define some general use functions:

In [0]:
def sigmoid(z):
  return 1/(1+np.exp(-z))

def cross_entropy(y, y_hat):
  return -(np.matmul(y.T, np.log(np.maximum(y_hat, 0.0001))) + np.matmul((1-y).T, np.log(np.maximum(1-y_hat, 0.0001))))

Create Neural Network class:

In [0]:
class NeuralNetwork:
  def __init__(self, input_nodes, hidden_nodes, output_nodes):
    self.input_nodes = input_nodes
    self.hidden_nodes = hidden_nodes
    self.output_nodes = output_nodes
    
    self.theta_1 = np.random.uniform(low=-1, high=1, size=(hidden_nodes, input_nodes + 1))
    self.theta_2 = np.random.uniform(low=-1, high=1, size=(output_nodes, hidden_nodes + 1))
  
  def feed_forward(self, image, return_layers=False):  # Input will be a 28x28 image
    x = image.reshape((self.input_nodes, 1))
    x = np.append([[1]], x, axis=0)  # Add bias
    
    h = sigmoid(np.matmul(self.theta_1, x))
    h = np.append([[1]], h, axis=0)  # Add bias
    
    o = sigmoid(np.matmul(self.theta_2, h))
    return o if not return_layers else (x, h, o)
  
  def cost(self, X, y):  # J(theta)
    J = 0
    m = X.shape[0]
    for i in range(m):
      J += cross_entropy(y[i], self.feed_forward(X[i]))
    return J/m
  
  def back_prop(self, X, y, lr=0.01):
    Delta_1 = np.zeros(shape=self.theta_1.shape)
    Delta_2 = np.zeros(shape=self.theta_2.shape)
    m = X.shape[0]
    for i in range(m):
      a_1, a_2, a_3 = self.feed_forward(X[i], return_layers=True)
      delta_3 = a_3 - y[i].reshape(self.output_nodes, 1)
      delta_2 = np.matmul(self.theta_2.T, delta_3) * a_2 * (1-a_2)
      
      Delta_2 += np.matmul(delta_3, a_2.T)
      Delta_1 += np.matmul(delta_2[1:], a_1.T)
    Delta_1 /= m
    Delta_2 /= m
    self.theta_1 -= lr * Delta_1
    self.theta_2 -= lr * Delta_2
  
  def accuracy(self, X, y):
    correct = 0
    m = X.shape[0]
    for i in range(m):
      if np.argmax(self.feed_forward(X[i])) == np.argmax(y[i]):
        correct += 1
    return correct/m

Setup Neural Network (784 node (+1 bias) input layer, 200 node (+1 bias) hidden layer, and 10 node output layer) and start learning process:

In [47]:
network = NeuralNetwork(784, 200, 10)

NUM_ITERATIONS = 200
PRINT_EVERY = 10
BATCH_SIZE = 1000

for i in range(1, NUM_ITERATIONS + 1):
  batch_index = ((i-1) * BATCH_SIZE) % X_train.shape[0]
  network.back_prop(X_train[batch_index:batch_index + BATCH_SIZE], y_train[batch_index:batch_index + BATCH_SIZE], lr=0.1)
  if i % PRINT_EVERY == 0:
    print(f'Iteration {i}: Training Accuracy = {network.accuracy(X_train, y_train)}, Testing Accuracy = {network.accuracy(X_test, y_test)}')


  


Iteration 10: Training Accuracy = 0.15886666666666666, Testing Accuracy = 0.1622
Iteration 20: Training Accuracy = 0.24608333333333332, Testing Accuracy = 0.2456
Iteration 30: Training Accuracy = 0.3245, Testing Accuracy = 0.3236
Iteration 40: Training Accuracy = 0.38826666666666665, Testing Accuracy = 0.3852
Iteration 50: Training Accuracy = 0.4421, Testing Accuracy = 0.4429
Iteration 60: Training Accuracy = 0.4794333333333333, Testing Accuracy = 0.4837
Iteration 70: Training Accuracy = 0.51595, Testing Accuracy = 0.5223
Iteration 80: Training Accuracy = 0.5425166666666666, Testing Accuracy = 0.5446
Iteration 90: Training Accuracy = 0.5651166666666667, Testing Accuracy = 0.57
Iteration 100: Training Accuracy = 0.58685, Testing Accuracy = 0.589
Iteration 110: Training Accuracy = 0.60315, Testing Accuracy = 0.6021
Iteration 120: Training Accuracy = 0.6209166666666667, Testing Accuracy = 0.6219
Iteration 130: Training Accuracy = 0.6336833333333334, Testing Accuracy = 0.6412
Iteration 140