In [65]:
import numpy as np
# import sys
from keras.datasets import mnist

In [66]:
ALPHA = 0.005
HIDDEN_SIZE = 100
PIXELS_PER_IMAGE = INPUT_SIZE = 28*28
OUTPUT_SIZE = 10
TRAIN_DATA_SIZE = 1000
LAYERS_TOTAL = 3
BATCH_SIZE = 100

In [67]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = x_train[0:TRAIN_DATA_SIZE] / 255, y_train[0:TRAIN_DATA_SIZE]

In [68]:
data_to_learn = images.reshape(TRAIN_DATA_SIZE, PIXELS_PER_IMAGE)
outputs_to_learn = np.zeros((TRAIN_DATA_SIZE, OUTPUT_SIZE), dtype=np.float32)

for ind, label in enumerate(labels):
    outputs_to_learn[ind][label] = 1.0

In [69]:
test_data = x_test.reshape(len(x_test), PIXELS_PER_IMAGE) / 255
test_outputs = np.zeros((len(y_test), OUTPUT_SIZE))

for ind, label in enumerate(y_test):
    test_outputs[ind][label] = 1.0

In [70]:
np.random.seed(1)

weights = [
    0.02 * np.random.rand(INPUT_SIZE, HIDDEN_SIZE) - 0.01,
    0.2 * np.random.rand(HIDDEN_SIZE, OUTPUT_SIZE) - 0.1,
]

In [71]:
def relu(x):
    return (x > 0) * x

def relu_deriv(x):
    return x > 0

In [72]:
def nn(data, weights, dropout=False):
    layers = [None] * LAYERS_TOTAL

    layers[0] = data
    layers[1] = relu(np.dot(layers[0], weights[0]))

    if(dropout):
        dropout_mask = np.random.randint(2, size=layers[1].shape)
        layers[1] *= dropout_mask * 2
        layers[2] = np.dot(layers[1], weights[1])
        return layers, dropout_mask
        
    layers[2] = np.dot(layers[1], weights[1])
    return layers
    

In [73]:
for i in range(1, 301):
    error = 0.0
    correct_counter = 0

    for j in range(TRAIN_DATA_SIZE // BATCH_SIZE):
        batch_start, batch_end = j * BATCH_SIZE, (j + 1) * BATCH_SIZE
        
        # Output
        data = data_to_learn[batch_start:batch_end]#.reshape(BATCH_SIZE, 1, INPUT_SIZE)
        layers, dropout_mask = nn(data=data, weights=weights, dropout=True)
        
        # Learning
        layer_deltas = [None] * LAYERS_TOTAL
        expected_output = outputs_to_learn[batch_start:batch_end]#.reshape(BATCH_SIZE, 1, OUTPUT_SIZE)
        
        error += np.sum((expected_output - layers[2]) ** 2)

        for k in range(BATCH_SIZE):
            correct_counter += np.argmax(expected_output[k]) == np.argmax(layers[2][k])

        layer_deltas[2] = (expected_output - layers[2]) / (BATCH_SIZE)
        layer_deltas[1] = np.dot(layer_deltas[2], weights[1].T) * relu_deriv(layers[1])
        layer_deltas[1] *= dropout_mask

        weights[1] += np.dot(layers[1].T, layer_deltas[2]) * ALPHA
        weights[0] += np.dot(layers[0].T, layer_deltas[1]) * ALPHA
    

    if(i % 10 == 0):
        print(
            f"Iteration: {i}",
            f"Error: {error / TRAIN_DATA_SIZE}",
            f"Correct: {correct_counter / TRAIN_DATA_SIZE}"
        )

        error = 0
        correct_counter = 0

        for j in range(len(test_data)):
            data = test_data[j].reshape(1, INPUT_SIZE)
            layers = nn(data=data, weights=weights)

            expected_output = test_outputs[j].reshape(1, OUTPUT_SIZE)

            error += np.sum((expected_output - layers[2]) ** 2)
            correct_counter += int(np.argmax(expected_output) == np.argmax(layers[2]))

        print(
            f"\tTest-Error: {error / len(test_data)}",
            f"\tTest-Acc: {correct_counter / len(test_data)}"
        )


Iteration: 10 Error: 0.87867843981461 Correct: 0.346
	Test-Error: 0.8671974340846704 	Test-Acc: 0.5051
Iteration: 20 Error: 0.8066977801104327 Correct: 0.503
	Test-Error: 0.7942635485074726 	Test-Acc: 0.6468
Iteration: 30 Error: 0.7579967602924296 Correct: 0.594
	Test-Error: 0.738831406489273 	Test-Acc: 0.6894
Iteration: 40 Error: 0.7088269688667319 Correct: 0.655
	Test-Error: 0.6956985241774137 	Test-Acc: 0.7123
Iteration: 50 Error: 0.6801481136439175 Correct: 0.669
	Test-Error: 0.6616547910913744 	Test-Acc: 0.7256
Iteration: 60 Error: 0.6479291840912572 Correct: 0.705
	Test-Error: 0.634123657800836 	Test-Acc: 0.7332
Iteration: 70 Error: 0.6265475105297316 Correct: 0.737
	Test-Error: 0.611188900588929 	Test-Acc: 0.7421
Iteration: 80 Error: 0.6095449332684234 Correct: 0.748
	Test-Error: 0.5920241913797585 	Test-Acc: 0.749
Iteration: 90 Error: 0.5884739387075518 Correct: 0.749
	Test-Error: 0.575035093067266 	Test-Acc: 0.7566
Iteration: 100 Error: 0.5689132133856755 Correct: 0.774
	Test-