In [42]:
import numpy as np
# import sys
from keras.datasets import mnist

In [43]:
ALPHA = 0.001
HIDDEN_SIZE = 40
PIXELS_PER_IMAGE = INPUT_SIZE = 28*28
OUTPUT_SIZE = 10
TRAIN_DATA_SIZE = 1000
LAYERS_TOTAL = 3
BATCH_SIZE = 100

In [44]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = x_train[0:TRAIN_DATA_SIZE] / 255, y_train[0:TRAIN_DATA_SIZE]

In [45]:
data_to_learn = images.reshape(TRAIN_DATA_SIZE, PIXELS_PER_IMAGE)
outputs_to_learn = np.zeros((TRAIN_DATA_SIZE, OUTPUT_SIZE), dtype=np.float32)

for ind, label in enumerate(labels):
    outputs_to_learn[ind][label] = 1.0

In [46]:
test_data = x_test.reshape(len(x_test), PIXELS_PER_IMAGE) / 255
test_outputs = np.zeros((len(y_test), OUTPUT_SIZE))

for ind, label in enumerate(y_test):
    test_outputs[ind][label] = 1.0

In [47]:
np.random.seed(1)
def relu(x): return (x > 0) * x
def relu_deriv(x): return x > 0

weights = [
    0.2 * np.random.rand(INPUT_SIZE, HIDDEN_SIZE) - 0.1,
    0.2 * np.random.rand(HIDDEN_SIZE, OUTPUT_SIZE) - 0.1,
]

In [48]:
def nn(data, weights, dropout=False):
    layers = [None] * LAYERS_TOTAL

    layers[0] = data
    layers[1] = relu(np.dot(layers[0], weights[0]))

    if(dropout):
        dropout_mask = np.random.randint(2, size=layers[1].shape)
        layers[1] *= dropout_mask * 2
        layers[2] = np.dot(layers[1], weights[1])
        return layers, dropout_mask
        
    layers[2] = np.dot(layers[1], weights[1])
    return layers
    

In [50]:
for i in range(1, 301):
    error = 0.0
    correct_counter = 0

    for j in range(TRAIN_DATA_SIZE // BATCH_SIZE):
        batch_start, batch_end = j * BATCH_SIZE, (j + 1) * BATCH_SIZE
        
        # Output
        data = data_to_learn[batch_start:batch_end]#.reshape(BATCH_SIZE, 1, INPUT_SIZE)
        layers, dropout_mask = nn(data=data, weights=weights, dropout=True)
        
        # Learning
        layer_deltas = [None] * LAYERS_TOTAL
        expected_output = outputs_to_learn[batch_start:batch_end]#.reshape(BATCH_SIZE, 1, OUTPUT_SIZE)
        
        error += np.sum((expected_output - layers[2]) ** 2)

        for k in range(BATCH_SIZE):
            correct_counter += np.argmax(expected_output[k]) == np.argmax(layers[2][k])

        layer_deltas[2] = (expected_output - layers[2]) / BATCH_SIZE
        layer_deltas[1] = np.dot(layer_deltas[2], weights[1].T) * relu_deriv(layers[1])
        layer_deltas[1] *= dropout_mask

        weights[1] += np.dot(layers[1].T, layer_deltas[2]) * ALPHA
        weights[0] += np.dot(layers[0].T, layer_deltas[1]) * ALPHA
    

    if(i % 10 == 0):
        print(
            f"Iteration: {i}",
            f"Error: {error / TRAIN_DATA_SIZE}",
            f"Correct: {correct_counter / TRAIN_DATA_SIZE}"
        )

        error = 0
        correct_counter = 0

        for j in range(len(test_data)):
            data = test_data[j].reshape(1, INPUT_SIZE)
            layers = nn(data=data, weights=weights)

            expected_output = test_outputs[j].reshape(1, OUTPUT_SIZE)

            error += np.sum((expected_output - layers[2]) ** 2)
            correct_counter += int(np.argmax(expected_output) == np.argmax(layers[2]))

        print(
            f"\tTest-Error: {error / len(test_data)}",
            f"\tTest-Acc: {correct_counter / len(test_data)}"
        )


Iteration: 10 Error: 0.3366291726804473 Correct: 0.842
	Test-Error: 0.3235399234923692 	Test-Acc: 0.8566
Iteration: 20 Error: 0.3471193025846126 Correct: 0.836
	Test-Error: 0.32364472486345913 	Test-Acc: 0.8565
Iteration: 30 Error: 0.34830378243421456 Correct: 0.828
	Test-Error: 0.324124669239041 	Test-Acc: 0.8566
Iteration: 40 Error: 0.34822065104032435 Correct: 0.845
	Test-Error: 0.32448059110265015 	Test-Acc: 0.8567
Iteration: 50 Error: 0.3311886159250758 Correct: 0.844
	Test-Error: 0.32455538273650497 	Test-Acc: 0.8571
Iteration: 60 Error: 0.3497377531713737 Correct: 0.826
	Test-Error: 0.3244993357128973 	Test-Acc: 0.8571
Iteration: 70 Error: 0.35446405316542556 Correct: 0.826
	Test-Error: 0.32433549850353 	Test-Acc: 0.8569
Iteration: 80 Error: 0.3441347974294454 Correct: 0.836
	Test-Error: 0.32421454621763274 	Test-Acc: 0.8572
Iteration: 90 Error: 0.33698100868360453 Correct: 0.845
	Test-Error: 0.32423972577692806 	Test-Acc: 0.857
Iteration: 100 Error: 0.35241587199805463 Correct: