In [1]:
import sys, numpy as np
from keras.datasets import mnist

Using TensorFlow backend.


#### load the mnist data

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#### flat the image

In [3]:
images, labels = (x_train[0:1000].reshape(1000,28*28)/255, y_train[0:1000])

#### create one hot label

In [4]:
one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

#### Repeat the same for test image

In [5]:
test_images = x_test.reshape(len(x_test),28*28)/255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

In [6]:
np.random.seed(1)
relu = lambda x:(x>=0) * x
relu2deriv = lambda x: x>=0
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

In [7]:
weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

In [10]:
for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    if(j % 10 == 0 or j == iterations-1):
        error, correct_cnt = (0.0, 0)
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)
            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +" Test-Acc:" + str(correct_cnt/float(len(test_images))))
        print()
    sys.stdout.write("\r"+ " I:"+str(j)+ " Error:" + str(error/float(len(images)))[0:5] +" Correct:" + str(correct_cnt/float(len(images))))

 Test-Err:0.654 Test-Acc:0.7073
 I:9 Error:0.108 Correct:1.0 Test-Err:0.662 Test-Acc:0.704
 I:19 Error:0.107 Correct:0.999 Test-Err:0.671 Test-Acc:0.702
 I:29 Error:0.107 Correct:0.999 Test-Err:0.679 Test-Acc:0.7007
 I:39 Error:0.106 Correct:0.998 Test-Err:0.685 Test-Acc:0.6985
 I:49 Error:0.106 Correct:0.998 Test-Err:0.691 Test-Acc:0.6973
 I:59 Error:0.105 Correct:0.998 Test-Err:0.697 Test-Acc:0.6963
 I:69 Error:0.105 Correct:0.998 Test-Err:0.702 Test-Acc:0.6946
 I:79 Error:0.105 Correct:0.998 Test-Err:0.706 Test-Acc:0.6938
 I:89 Error:0.105 Correct:0.998 Test-Err:0.710 Test-Acc:0.6937
 I:99 Error:0.105 Correct:0.998 Test-Err:0.712 Test-Acc:0.6932
 I:109 Error:0.104 Correct:0.998 Test-Err:0.715 Test-Acc:0.6914
 I:119 Error:0.104 Correct:0.999 Test-Err:0.718 Test-Acc:0.6916
 I:129 Error:0.104 Correct:0.999 Test-Err:0.721 Test-Acc:0.6904
 I:139 Error:0.104 Correct:0.999 Test-Err:0.725 Test-Acc:0.6895
 I:149 Error:0.104 Correct:0.999 Test-Err:0.729 Test-Acc:0.6882
 I:159 Error:0.104 Corr

In [11]:
for j in range(iterations):
    error, correct_cnt = (0.0,0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        sys.stdout.write("\n" + "I:" + str(j) + " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
 " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
 " Train-Err:" + str(error/ float(len(images)))[0:5] +\
 " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.601 Test-Acc:0.6307 Train-Err:0.589 Train-Acc:0.646
I:10 Test-Err:0.568 Test-Acc:0.7035 Train-Err:0.538 Train-Acc:0.716
I:20 Test-Err:0.494 Test-Acc:0.7423 Train-Err:0.484 Train-Acc:0.724
I:30 Test-Err:0.484 Test-Acc:0.7493 Train-Err:0.476 Train-Acc:0.759
I:40 Test-Err:0.454 Test-Acc:0.7848 Train-Err:0.454 Train-Acc:0.765
I:50 Test-Err:0.457 Test-Acc:0.7749 Train-Err:0.431 Train-Acc:0.784
I:60 Test-Err:0.456 Test-Acc:0.7786 Train-Err:0.445 Train-Acc:0.794
I:70 Test-Err:0.461 Test-Acc:0.7672 Train-Err:0.451 Train-Acc:0.773
I:80 Test-Err:0.449 Test-Acc:0.7868 Train-Err:0.425 Train-Acc:0.805
I:90 Test-Err:0.458 Test-Acc:0.7665 Train-Err:0.427 Train-Acc:0.795
I:100 Test-Err:0.451 Test-Acc:0.7817 Train-Err:0.416 Train-Acc:0.799
I:110 Test-Err:0.449 Test-Acc:0.7915 Train-Err:0.412 Train-Acc:0.808
I:120 Test-Err:0.438 Test-Acc:0.7846 Train-Err:0.408 Train-Acc:0.821
I:130 Test-Err:0.441 Test-Acc:0.7801 Train-Err:0.416 Train-Acc:0.8
I:140 Test-Err:0.446 Test-Acc:0.7843 Train-Err