In [1]:
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils

In [12]:
# Activation Function

def tanh(x):
    return np.tanh(x);

# Derivative of Activation Function

def tanh_prime(x):
    return 1-np.tanh(x)**2;

# Loss function, instead of ReLU and Sigmoid

def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

# Derivative of Loss Function

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

# declare weights and biases
w1 = np.random.rand(28*28*1, 100) - 0.5
w2 = np.random.rand(100, 50) - 0.5
w3 = np.random.rand(50, 10) - 0.5

b1 = np.random.rand(1, 100)
b2 = np.random.rand(1, 50)
b3 = np.random.rand(1, 10)

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = 5
# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 20000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)
samples = len(x_train[:2000])

for x in range(70):
    err = 0
    for j in range(samples):
        
    # forward prop
    # uses input as activation for z1, and propogates
    # the output forwards for each activation
        
        input = x_train[j]
        z1 = np.dot(input, w1) + b1
        h1 = tanh(z1)
        z2 = np.dot(h1, w2) + b2
        h2 = tanh(z2)
        z3 = np.dot(h2, w3) + b3
        output = tanh(z3)
        
    # Calculate error of this sample
        err += mse(y_train[j], output)
        
    # back propogation
        """calculate derivative of cost/w3, cost/w2, and cost/w1
        to get errors for each weight and how much to tune them by.
        Finally, subtract the error by each respective weight to make
        the network more accurate."""
        error = mse_prime(y_train[j],output)
        error_times_tanh_prime_z3 = error * tanh_prime(z3)
        output_error = np.dot(h2.T, error_times_tanh_prime_z3)
        j = np.dot(error_times_tanh_prime_z3, w3.T)
        tanh_prime_z2_times_j = tanh_prime(z2) * j
        h2_error = np.dot(h1.T,tanh_prime_z2_times_j)
        k = np.dot(tanh_prime_z2_times_j, w2.T)
        tanh_prime_z1_times_k = tanh_prime(z1) * k
        h1_error = np.dot(input.T, tanh_prime_z1_times_k)
        
        w3 -= .15 * output_error
        b3 -= .15 * error_times_tanh_prime_z3
        w2 -= .15 * h2_error
        b2 -= .15 * tanh_prime_z2_times_j
        w1 -= .15 * h1_error
        b1 -= .15 * tanh_prime_z1_times_k
        
    # Average out the error of this sample over all samples
    # print it out for each epoch
    err/=samples
    print("epoch number ", x + 1, " has error of ", err)
        
        

epoch number  1  has error of  0.1003919834265447
epoch number  2  has error of  0.0560855360473059
epoch number  3  has error of  0.04349345299223934
epoch number  4  has error of  0.035102529303888824
epoch number  5  has error of  0.029758929591297133
epoch number  6  has error of  0.025320149060809833
epoch number  7  has error of  0.02188188122431867
epoch number  8  has error of  0.018974026016403526
epoch number  9  has error of  0.016391486611742737
epoch number  10  has error of  0.014257467574594711
epoch number  11  has error of  0.012825755350200262
epoch number  12  has error of  0.011537717682379281
epoch number  13  has error of  0.01048421174482677
epoch number  14  has error of  0.00962191166986355
epoch number  15  has error of  0.009008987715978132
epoch number  16  has error of  0.008441120323745843
epoch number  17  has error of  0.007995692441003363
epoch number  18  has error of  0.007499111624429259
epoch number  19  has error of  0.007217985752638521
epoch numb

In [13]:
"""predict the output of input images through forward propogation
and appending the result to a list. print out the output
of neural network and true values."""
samples = len(x_train[0:3])
result = []
for x in range(samples):
    input = x_train[x]
    z1 = np.dot(input, w1) + b1
    h1 = tanh(z1)
    z2 = np.dot(h1, w2) + b2
    h2 = tanh(z2)
    z3 = np.dot(h2, w3) + b3
    output = tanh(z3)
    result.append(output)
    
print(result)
print("\n\n\n")
print('actual results: ')
print(y_test[0:3])

[array([[ 0.00301005,  0.00403768, -0.00292536,  0.03586263, -0.01092165,
         0.94970436, -0.01698962,  0.00683722, -0.01984634,  0.0134784 ]]), array([[ 9.90915692e-01,  9.23880808e-04, -2.26909574e-03,
        -7.42488822e-04, -1.83033496e-03, -1.37660724e-03,
        -1.14982696e-02,  1.13752209e-04, -9.32582668e-03,
        -4.02667394e-04]]), array([[ 0.007277  ,  0.02123113, -0.0065323 , -0.01891555,  0.99073283,
        -0.00849289, -0.01505545, -0.00123396, -0.00875344, -0.00126416]])]




actual results: 
[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]


<h2>Next Steps</h2>

- Increase number of layers to compute the most accurate result
- Experimenet with appropriate learning rate
- Train on more data to get more accurate results
- Use classes for the network and layers to more cleanly propogate forwards, backwards, and predict
- Use different activation and loss functions to see how accuracy of output changes
- Increase number of epochs
- tweak starting biases and weights to get a smaller error quicker