# Neural Network 
## with MNIST Dataset
Tutorial Used : https://www.python-course.eu/neural_network_mnist.php
                https://jonathanweisberg.org/post/A%20Neural%20Network%20from%20Scratch%20-%20Part%201/

In [1]:
import numpy as np
import keras
%matplotlib inline
import matplotlib.pyplot as plt



Using TensorFlow backend.


In [2]:
# Load Data
(train_x_orig, train_y_orig), (test_x_orig, test_y_orig) = keras.datasets.mnist.load_data()

In [3]:
# Data Values

m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]

print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y_orig.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y_orig.shape))

Number of training examples: 60000
Number of testing examples: 10000
Each image is of size: (28, 28, 3)
train_x_orig shape: (60000, 28, 28)
train_y shape: (60000,)
test_x_orig shape: (10000, 28, 28)
test_y shape: (10000,)


In [4]:
# Prepare Training Labels

digits = 10
examples = train_y_orig.shape[0]

train_y_orig = train_y_orig.reshape(1, examples)

train_y = np.eye(digits)[train_y_orig.astype('int32')]
train_y = train_y.T.reshape(digits, examples)
#print(train_y)

In [5]:
# Prepare Test Labels

m_test = test_y_orig.shape[0]
test_y_orig = test_y_orig.reshape(1, m_test)

test_y = np.eye(digits)[test_y_orig.astype('int32')]
test_y = test_y.T.reshape(digits, m_test)
#print(test_y)

In [6]:
#Model data

# Convert into shape of (784,60000)
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T  
# Convert into shape of (784,10000)
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255.
test_x = test_x_flatten / 255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

train_y = train_y
test_y = test_y
print ("train_y's shape: " + str(train_y.shape))
print ("test_y's shape: " + str(test_y.shape))


# train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T  
# test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# train_x = train_x_flatten / 255.
# test_x = test_x_flatten / 255.

# print ("train_x's shape: " + str(train_x.shape))
# print ("test_x's shape: " + str(test_x.shape))

# train_y = train_y
# test_y = test_y
# print ("train_y's shape: " + str(train_y.shape))
# print ("test_y's shape: " + str(test_y.shape))




train_x's shape: (784, 60000)
test_x's shape: (784, 10000)
train_y's shape: (10, 60000)
test_y's shape: (10, 10000)


In [7]:
#Define all the activation functions
def sigmoid(value):
    return 1.0 / ( 1.0+np.exp(-value))

def sigmoid_prime(value):
    return sigmoid(value) * (1- sigmoid(value))

def softmax(value):
    return np.exp(value) / (np.sum(np.exp(value), axis = 0))

def sofmax_prime(value):
    return softmax(value) * (1- softmax(value))

In [8]:
# Cost Function

def compute_cost(Y, Y_hat):
    cost_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
    m = Y.shape[1]
    cost = -(1/m) * cost_sum
    return cost

In [9]:
# Initializing Hyper Parameters

n_x = train_x.shape[0]
n_h = 50
learning_rate = 1
digits = 10
m = train_x.shape[1]
epochs = 5000

In [10]:
print('Input Values : {}'.format(n_x))
print('Hidden Values : {}'.format(n_h))
print(m)
print(train_x.shape)

Input Values : 784
Hidden Values : 50
60000
(784, 60000)


In [11]:
# Initializing Weights and Biasis

W1 = np.random.randn(n_h, n_x)
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(digits, n_h)
b2 = np.zeros((digits, 1))
costs = []

In [12]:
W1.shape

(50, 784)

In [13]:
# Running the Neural Network

for i in range(epochs):
    Z1 = np.matmul(W1,train_x) + b1
    A1 = sigmoid(Z1)     # Activation Sigmoid
    Z2 = np.matmul(W2,A1) + b2
    A2 = softmax(Z2)     # Activation Softmax
    cost = compute_cost(train_y, A2)
    
    dZ2 = A2 - train_y
    dW2 = (1./m) * np.matmul(dZ2, A1.T)
    db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)
    
    dA1 = np.matmul(W2.T,dZ2)
    dZ1 = dA1 * sigmoid_prime(Z1)
    dW1 = (1./m) * np.matmul(dZ1, train_x.T)
    db1 = (1./m) * np.sum(dZ1, axis = 1, keepdims=True)
    
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    
    if (i % 100 == 0):
        print("Epoch", i, "cost: ", cost)

print("Final cost:", cost)
    

Epoch 0 cost:  8.469171166420832
Epoch 100 cost:  0.7453787192903603
Epoch 200 cost:  0.5741690493852551
Epoch 300 cost:  0.49876598671588207
Epoch 400 cost:  0.4527038860318677
Epoch 500 cost:  0.4201930331787324
Epoch 600 cost:  0.3952274714531878
Epoch 700 cost:  0.37510283173097736
Epoch 800 cost:  0.3583698316502337
Epoch 900 cost:  0.3441332784074583
Epoch 1000 cost:  0.3318052974131262
Epoch 1100 cost:  0.32096635629279757
Epoch 1200 cost:  0.3113031593927571
Epoch 1300 cost:  0.3025831511490964
Epoch 1400 cost:  0.2946357104335482
Epoch 1500 cost:  0.2873350719943786
Epoch 1600 cost:  0.2805871515612894
Epoch 1700 cost:  0.27431976182527035
Epoch 1800 cost:  0.26847548659418574
Epoch 1900 cost:  0.2630066514697559
Epoch 2000 cost:  0.25787170649872293
Epoch 2100 cost:  0.2530332601559977
Epoch 2200 cost:  0.24845753950987745
Epoch 2300 cost:  0.24411452199187086
Epoch 2400 cost:  0.23997843516899642
Epoch 2500 cost:  0.2360283461966839
Epoch 2600 cost:  0.23224821640759835
Epoc

In [14]:
# Save the Biasis and weights because it took ages to calculate on Mac xP
np.savetxt('W1.csv', W1, delimiter=',')
np.savetxt('W2.csv', W2, delimiter=',')
np.savetxt('b1.csv', b1, delimiter=',')
np.savetxt('b2.csv', b2, delimiter=',')

In [18]:
# Test Accuracy 

Z1 = np.matmul(W1, test_x) + b1
A1 = sigmoid(Z1)
Z2 = np.matmul(W2, A1) + b2
A2 = softmax(Z2)
predictions = np.argmax(A2, axis=0)
labels = np.argmax(test_y, axis=0)

correct = 0 
for i in range(labels.size):
    if predictions[i] == labels[i]:
        correct +=1
print('Test Accuracy : {}'.format(100*correct/labels.size))


Test Accuracy : 93.53
