# Neural Network From Scratch

Simply implementing a Feed-Forward Neural Network From Scratch

In [9]:
# Load some libraries
import numpy as np
from matplotlib import pyplot as plt

from keras.datasets import mnist # cheating a little, loading easy mnist dataset from keras library
from keras.utils import np_utils
from sklearn.metrics import log_loss
from scipy.special import expit # more robust sigmoid

## Load and Preprocess our images

In [10]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalize Pixel Values
X_train /= 255
X_test /= 255

# Convert array of ints (digit values) to one-hot encoded categorical
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

# convert images from 28x28 to 1x784
X_train = np.reshape(X_train, (60000, 784))
X_test = np.reshape(X_test, (10000, 784))

print("Input:", X_train.shape[1])
print("Output:", y_train.shape[1])

Input: 784
Output: 10


In [11]:
X_train[0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [12]:
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

In [13]:
# Define some functions

# Activation Function
def activation(x, derivative=False):
    return sigmoid(x, derivative)

def tanh(x, derivative=False):
    return 1 - np.power(x, 2) if derivative else np.tanh(x)

def sigmoid(x, derivative=False):
    return x * (1 - x) if derivative else expit(x)

def relu(x, derivative=False):
    return (x>0).astype(x.dtype) if derivative else np.maximum(x, 0, x)

def softmax(x):
    return np.apply_along_axis(_softmax, 1, x)

def _softmax(x):
    exps = np.exp(x - np.max(x))
    return exps / np.sum(exps)    


def calculate_loss(model): 
    W1, b1, W2, b2= model['W1'], model['b1'], model['W2'], model['b2']
    # Forward propagation to calculate our predictions 
    l1 = activation(X_train.dot(W1) + b1) # Input -> Hidden 1 || activation(x.t * W + bias) 
    output = softmax(l1.dot(W2) + b2) # Hidden 1 -> Output || Softmax Probabilites
    # Calculating the loss
    return log_loss(y_train, output)

In [14]:
# Define some parameters

# Layer Parameters
num_examples = X_train.shape[0] # training set size (60000)
nn_input_dim = X_train.shape[1] # input layer dimensionality (784)
nn_hdim_1 = 15
nn_output_dim = y_train.shape[1] # output layer dimensionality (10)

epochs = 20000 # How many times be forward and back propigate the network

# Gradient descent parameters
epsilon = .001 # learning rate for gradient descent 
reg_lambda = 0 # regularization strength

In [None]:
# Input -> Hidden 1
W1 = np.random.randn(nn_input_dim, nn_hdim_1).astype(np.float32)
b1 = np.zeros((1, nn_hdim_1))

# Hidden 1 -> Output
W2 = np.random.randn(nn_hdim_1, nn_output_dim).astype(np.float32)
b2 = np.zeros((1, nn_output_dim))

W1.shape # for each node in input, there is a weight that corresponds with a node in hidden layer 1 (23520 total weights)

(784, 15)

In [None]:
# Now our Network

model = {}

# Gradient descent... 
for i in range(0, epochs):
 
    # Forward propagation
    l1 = activation(X_train.dot(W1) + b1) # Input -> Hidden 1 || activation(x.t * W + bias)
    output = softmax(l1.dot(W2) + b2) # Hidden 1 -> Output || Softmax Probabilites

    # Backpropagation   
    output_error = output - y_train # technically, you'd need a derived softmax activation, but that equals 1, so we don't add it
    l1_error = output_error.dot(W2.T) * activation(l1, True)
    
    dW2 = np.dot(l1.T, output_error)
    db2 = np.average(output_error, axis=0)
    dW1 = np.dot(X_train.T, l1_error)
    db1 = np.average(l1_error, axis=0)
    
    # add regularization terms to weights
    dW2 += reg_lambda * W2 
    dW1 += reg_lambda * W1
    
    # Update weights with respect to learning rate
    W1 += -epsilon * dW1
    b1 += -epsilon * db1 
    W2 += -epsilon * dW2 
    b2 += -epsilon * db2
    
    model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} 
    
    # Optionally print the loss. 
    # This is expensive because it uses the whole dataset, so we don't want to do it too often. 
    if i % 2000 == 0 or i == epochs-1:
        print("Loss after iteration %i: %f" %(i, calculate_loss(model))) 

Loss after iteration 0: 11.747941
Loss after iteration 2000: 2.157593
Loss after iteration 4000: 2.051320
Loss after iteration 6000: 1.983196
Loss after iteration 8000: 2.033731
Loss after iteration 10000: 1.844826


In [None]:
# let's see how we did

# Forward Propigate to get outputs on train data
l1_train = activation(X_train.dot(W1) + b1) # Input -> Hidden 1 || activation(x * W + bias)
output_train = softmax(l1_train.dot(W2) + b2)

# Forward Propigate to get outputs on test data
l1_test = activation(X_test.dot(W1) + b1) # Input -> Hidden 1 || activation(x * W + bias)
output_test = softmax(l1_test.dot(W2) + b2) # Hidden 1 -> Output || Softmax Probabilites

correct_train = 0
for i in range(0, output_train.shape[0]):
    if np.argmax(output_train[i]) == np.argmax(y_train[i]):
        correct_train += 1

correct_test = 0
for i in range(0, output_test.shape[0]):
    if np.argmax(output_test[i]) == np.argmax(y_test[i]):
        correct_test += 1

train_accuracy = correct_train / y_train.shape[0]
test_accuracy = correct_test / y_test.shape[0]

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)