In [0]:
import numpy as np, sys
np.random.seed(1)

from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:10000].reshape(10000,28*28)\
                                              / 255, y_train[0:10000])
one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

def tanh(x):
    return np.tanh(x)
def tanh2deriv(output):
    return 1 - (output ** 2)
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

def relu(x):
    return (x >= 0) * x

def relu2deriv(output):
    return output >= 0     

alpha, iterations, hidden_size_1,hidden_size_2 = (2, 300, 100,200)
pixels_per_image, num_labels = (784, 10)
batch_size = 1000

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size_1))-0.1
weights_1_2 = 0.02*np.random.random((hidden_size_1,hidden_size_2)) - 0.01
weights_2_3 = 0.02*np.random.random((hidden_size_2,num_labels)) - 0.01


for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end=((i * batch_size),((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask_1 = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask_1 * 2
        layer_2 = tanh(np.dot(layer_1,weights_1_2))
        dropout_mask_2 = np.random.randint(2,size=layer_2.shape)
        layer_2 *= dropout_mask_2 * 2
        layer_3 = softmax(np.dot(layer_2,weights_2_3))

        for k in range(batch_size):
            
             correct_cnt += int(np.argmax(layer_3[k:k+1]) == \
                          np.argmax(labels[batch_start+k:batch_start+k+1]))
        
        layer_3_delta = (labels[batch_start:batch_end]-layer_3)\
                                           / (batch_size * layer_3.shape[0])
        
        layer_2_delta = layer_3_delta.dot(weights_2_3.T) \
                                                       * tanh2deriv(layer_2)

        layer_1_delta = layer_2_delta.dot(weights_1_2.T) \
                                                       * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask_1
        layer_2_delta *= dropout_mask_2 

       
        weights_2_3 += alpha * layer_2.T.dot(layer_3_delta)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    test_correct_cnt = 0

    for i in range(len(test_images)):

        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = tanh(np.dot(layer_1,weights_1_2))
        layer_3 = softmax(np.dot(layer_2,weights_2_3))
        test_correct_cnt += int(np.argmax(layer_3) == \
                                               np.argmax(test_labels[i:i+1]))
    if(j % 10 == 0):
        sys.stdout.write("\n"+ "I:" + str(j) + \
         " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+\
         " Train-Acc:" + str(correct_cnt/float(len(images))))