In [None]:
'''
mnist deep nn implementation with dropout
'''

In [1]:
import tensorflow as tf
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1
    
np.random.seed(1)

def relu(x):
    return (x >= 0) * x

def relu2deriv(x):
    return x >= 0

alpha = 0.005
neurons = 100
pixels = 784
num_labels = 10
iterations = 300

weights_0_1 = 0.2 * np.random.random((pixels,neurons)) - 0.1
weights_1_2 = 0.2 * np.random.random((neurons,num_labels)) - 0.1

for iteration in range(iterations):
    
    error = 0.0
    correct_count = 0
    
    for i in range(len(images)):
        
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum( (labels[i:i+1] - layer_2) ** 2 ) 
        
        correct_count += int( np.argmax(layer_2) == np.argmax(labels[i:i+1]) )
        
        layer_2_delta = labels[i:i+1] - layer_2
        
        layer_1_delta = np.dot(layer_2_delta, weights_1_2.T) * relu2deriv(layer_1)
    
        layer_1_delta *= dropout_mask
        
        weights_1_2 += alpha * np.dot(layer_1.T,layer_2_delta)
        weights_0_1 += alpha * np.dot(layer_0.T, layer_1_delta)
        
    if (iteration % 10 == 0):
        
        test_error = 0.0
        test_correct = 0
        
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            test_error += (np.sum(test_labels[i:i+1] - layer_2) ** 2)
            test_correct += int( np.argmax(layer_2) == np.argmax(test_labels[i:i+1]) )
            
        sys.stdout.write("\n" + \
            "I: " + str(iteration) + \
            " Test Error: " + str(test_error / float(len(test_images))) [0:5] + \
            " Test Accuracy: " + str(test_correct / float(len(test_images))) + \
            " Train-Err:" + str(error/ float(len(images)))[0:5] +\
            " Train-Acc:" + str(correct_count/ float(len(images))))          

Using TensorFlow backend.



I: 0 Test Error: 0.069 Test Accuracy: 0.6333 Train-Err:0.891 Train-Acc:0.413
I: 10 Test Error: 0.085 Test Accuracy: 0.787 Train-Err:0.472 Train-Acc:0.764
I: 20 Test Error: 0.091 Test Accuracy: 0.8133 Train-Err:0.430 Train-Acc:0.809
I: 30 Test Error: 0.102 Test Accuracy: 0.8114 Train-Err:0.415 Train-Acc:0.811
I: 40 Test Error: 0.122 Test Accuracy: 0.8112 Train-Err:0.413 Train-Acc:0.827
I: 50 Test Error: 0.118 Test Accuracy: 0.8133 Train-Err:0.392 Train-Acc:0.836
I: 60 Test Error: 0.130 Test Accuracy: 0.8236 Train-Err:0.402 Train-Acc:0.836
I: 70 Test Error: 0.132 Test Accuracy: 0.8033 Train-Err:0.383 Train-Acc:0.857
I: 80 Test Error: 0.135 Test Accuracy: 0.8054 Train-Err:0.386 Train-Acc:0.854
I: 90 Test Error: 0.146 Test Accuracy: 0.8144 Train-Err:0.376 Train-Acc:0.868
I: 100 Test Error: 0.139 Test Accuracy: 0.7903 Train-Err:0.369 Train-Acc:0.864
I: 110 Test Error: 0.141 Test Accuracy: 0.8003 Train-Err:0.371 Train-Acc:0.868
I: 120 Test Error: 0.148 Test Accuracy: 0.8046 Train-Err:0.353 