In [47]:
import sys
import numpy as np
from keras.datasets import mnist

In [48]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [49]:
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])

In [50]:
one_hot_labels = np.zeros((len(labels), 10))

In [51]:
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

In [52]:
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

In [53]:
np.random.seed(1)

In [54]:
relu = lambda x:(x>=0) * x
relu2deriv = lambda x: x>=0

In [55]:
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

In [56]:
weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

In [57]:
def train(weights_0_1, weights_1_2, alpha):
    for j in range(iterations):
        error, correct_cnt = (0.0, 0)
        for i in range(len(images)):
            layer_0 = images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            error += np.sum((labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2)) == np.argmax(labels[i:i+1])

            layer_2_delta = labels[i:i+1] - layer_2
            layer_1_delta = np.dot(layer_2_delta, weights_1_2.T) * relu2deriv(layer_1)

            weights_1_2 += alpha * np.dot(layer_1.T, layer_2_delta)
            weights_0_1 += alpha * np.dot(layer_0.T, layer_1_delta)
        sys.stdout.write("\r I: {} Error: {} Correct: {}".format(j, str(error/float(len(images)))[0:5], correct_cnt/float(len(images))))

In [58]:
def train_dropout(weights_0_1, weights_1_2, alpha):
    for j in range(iterations):
        error, correct_cnt = (0.0, 0)
        for i in range(len(images)):
            layer_0 = images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            
            # Dropout
            dropout_mask = np.random.randint(2, size=layer_1.shape)
            layer_1 *= dropout_mask * 2
            
            layer_2 = np.dot(layer_1, weights_1_2)
            error += np.sum((labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2)) == np.argmax(labels[i:i+1])

            layer_2_delta = labels[i:i+1] - layer_2
            layer_1_delta = np.dot(layer_2_delta, weights_1_2.T) * relu2deriv(layer_1)
            
            # Dropout
            layer_1_delta *= dropout_mask

            weights_1_2 += alpha * np.dot(layer_1.T, layer_2_delta)
            weights_0_1 += alpha * np.dot(layer_0.T, layer_1_delta)
        sys.stdout.write("\r I: {} Error: {} Correct: {}".format(j, str(error/float(len(images)))[0:5], correct_cnt/float(len(images))))

In [59]:
def classify():
    error, correct_cnt = (0.0, 0)
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2)) == np.argmax(test_labels[i:i+1])
    sys.stdout.write("Error: {} Correct: {}".format(j, str(error/float(len(test_images)))[0:5], correct_cnt/float(len(test_images))))
    print()

In [60]:
train_dropout(weights_0_1, weights_1_2, alpha)

 I: 349 Error: 0.418 Correct: 0.802

In [62]:
classify()

Error: 349 Correct: 0.421
