In [1]:
import numpy as np

In [2]:
import sys
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))

for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

Using TensorFlow backend.


In [3]:
np.random.seed(1)

def relu(x):
    return (x >= 0) * x
def relu2deriv(x):
    return x >= 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = np.random.random((pixels_per_image, hidden_size)) * 0.2 - 0.1
weights_1_2 = np.random.random((hidden_size, num_labels)) * 0.2 - 0.1

In [4]:
for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        
        for k in range(batch_size):
            correct_cnt += int(np.argmax(labels[batch_start+k:batch_start+k+1])) == np.argmax(layer_2[k:k+1])
            
#             layer_2_delta = labels[batch_start+k:batch_start+k+1] - layer_2[k:k+1]
            layer_2_delta = (labels[batch_start:batch_end] - layer_2) / batch_size
            layer_1_delta = np.dot(layer_2_delta, weights_1_2.T) * relu2deriv(layer_1)
            layer_1_delta *= dropout_mask
            
            weights_1_2 += alpha * np.dot(layer_1.T, layer_2_delta)
            weights_0_1 += alpha * np.dot(layer_0.T, layer_1_delta)
        
    if (j % 10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(test_labels[i:i+1])) == np.argmax(layer_2)

        sys.stdout.write("\n I: {} Test-Err: {} Test-Acc: {} Train-Err: {} Train-Acc: {}".format(j, str(test_error / float(len(test_images)))[0:5], str(test_correct_cnt / float(len(test_images))), str(error / float(len(images)))[0:5],str(correct_cnt / float(len(images)))))
                             


 I: 0 Test-Err: 0.815 Test-Acc: 0.3832 Train-Err: 1.284 Train-Acc: 0.165
 I: 10 Test-Err: 0.568 Test-Acc: 0.7173 Train-Err: 0.591 Train-Acc: 0.672
 I: 20 Test-Err: 0.510 Test-Acc: 0.7571 Train-Err: 0.532 Train-Acc: 0.729
 I: 30 Test-Err: 0.485 Test-Acc: 0.7793 Train-Err: 0.498 Train-Acc: 0.754
 I: 40 Test-Err: 0.468 Test-Acc: 0.7877 Train-Err: 0.489 Train-Acc: 0.749
 I: 50 Test-Err: 0.458 Test-Acc: 0.793 Train-Err: 0.468 Train-Acc: 0.775
 I: 60 Test-Err: 0.452 Test-Acc: 0.7995 Train-Err: 0.452 Train-Acc: 0.799
 I: 70 Test-Err: 0.446 Test-Acc: 0.803 Train-Err: 0.453 Train-Acc: 0.792
 I: 80 Test-Err: 0.451 Test-Acc: 0.7968 Train-Err: 0.457 Train-Acc: 0.786
 I: 90 Test-Err: 0.447 Test-Acc: 0.795 Train-Err: 0.454 Train-Acc: 0.799
 I: 100 Test-Err: 0.448 Test-Acc: 0.793 Train-Err: 0.447 Train-Acc: 0.796
 I: 110 Test-Err: 0.441 Test-Acc: 0.7943 Train-Err: 0.426 Train-Acc: 0.816
 I: 120 Test-Err: 0.442 Test-Acc: 0.7966 Train-Err: 0.431 Train-Acc: 0.813
 I: 130 Test-Err: 0.441 Test-Acc: 0.790