In [84]:
import numpy as np
from keras.datasets import mnist

In [96]:
LEARN_DATA_SIZE = 1000
INPUT_SIZE = 784
OUTPUT_SIZE = 10
BATCH_SIZE = 100
HIDDEN_SIZE = 100
ALPHA = 2
TOTAL_LAYERS_AMOUNT = 3

In [86]:
def relu(x):
    return (x > 0) * x

def relu_deriv(x):
    return x > 0

def tanh(x):
    return np.tanh(x)

def tanh_deriv(x):
    return 1 - (x ** 2)

def softmax(x):
    tmp = np.exp(x)
    return tmp / np.sum(tmp, axis=1, keepdims=True)

In [87]:
(images_train, labels_train), (images_test, labels_test) = mnist.load_data()

learn_data, learn_outputs = images_train[:LEARN_DATA_SIZE] / 255, labels_train[:LEARN_DATA_SIZE]

In [88]:
print(learn_data.shape, learn_outputs.shape)
learn_data = learn_data.reshape(LEARN_DATA_SIZE, INPUT_SIZE)
print(learn_data.shape)

(1000, 28, 28) (1000,)
(1000, 784)


In [89]:
tmp = np.zeros(shape=(LEARN_DATA_SIZE, OUTPUT_SIZE), dtype=np.float32)
for ind, out in enumerate(learn_outputs):
    tmp[ind][out] = 1.0
learn_outputs = tmp
print(learn_outputs.shape)

(1000, 10)


In [90]:
test_data = images_test / 255
test_outputs = np.zeros(shape=(labels_test.shape[0], 10), dtype=np.float32)
for ind, out in enumerate(labels_test):
    test_outputs[ind][out] = 1.0
test_ouputs = softmax(test_outputs)
print(test_outputs.shape)

(10000, 10)


In [97]:
np.random.seed(1)

weights = [
    0.02 * np.random.rand(INPUT_SIZE, HIDDEN_SIZE) - 0.01,
    0.2 * np.random.rand(HIDDEN_SIZE, OUTPUT_SIZE) - 0.1
]

In [98]:
for i in range(1, 301):
    correct_counter = 0
    for j in range(LEARN_DATA_SIZE // BATCH_SIZE):
        batch_start, batch_end = j * BATCH_SIZE, (j + 1) * BATCH_SIZE

        layers = [None] * TOTAL_LAYERS_AMOUNT
        layers[0] = learn_data[batch_start:batch_end]
        layers[1] = tanh(np.dot(layers[0], weights[0]))
        mask = np.random.randint(2, size=layers[1].shape)
        layers[1] *= mask
        layers[2] = softmax(np.dot(layers[1], weights[1]))

        expected_outputs = learn_outputs[batch_start:batch_end]
        
        for k in range(BATCH_SIZE):
            correct_counter += int(np.argmax(layers[2][k]) == np.argmax(expected_outputs[k]))

        layer_deltas = [None] * TOTAL_LAYERS_AMOUNT
        layer_deltas[2] = (expected_outputs - layers[2]) / (BATCH_SIZE)
        layer_deltas[1] = np.dot(layer_deltas[2], weights[1].T) * tanh_deriv(layers[1])
        layer_deltas[1] *= mask

        weights[1] += np.dot(layers[1].T, layer_deltas[2]) * ALPHA
        weights[0] += np.dot(layers[0].T, layer_deltas[1]) * ALPHA
    
    if (i % 10 == 0):
        print(
            f"Iter: {i}",
            f"Learn-Acc: {correct_counter / LEARN_DATA_SIZE:.2f}",
            end=' '
        )

        correct_counter = 0

        for j in range(len(test_data)):
            layers = [None] * TOTAL_LAYERS_AMOUNT
            layers[0] = test_data[j].reshape(1, INPUT_SIZE)
            layers[1] = tanh(np.dot(layers[0], weights[0]))
            layers[2] = np.dot(layers[1], weights[1])

            correct_counter += (np.argmax(layers[2]) == np.argmax(test_outputs[j]))
        
        print(f"Test-Acc: {correct_counter / len(test_data):.2f}")


Iter: 10 Learn-Acc: 0.94 Test-Acc: 0.85
Iter: 20 Learn-Acc: 0.98 Test-Acc: 0.86
Iter: 30 Learn-Acc: 0.99 Test-Acc: 0.87
Iter: 40 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 50 Learn-Acc: 0.99 Test-Acc: 0.88
Iter: 60 Learn-Acc: 0.99 Test-Acc: 0.87
Iter: 70 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 80 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 90 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 100 Learn-Acc: 1.00 Test-Acc: 0.87
Iter: 110 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 120 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 130 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 140 Learn-Acc: 1.00 Test-Acc: 0.87
Iter: 150 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 160 Learn-Acc: 1.00 Test-Acc: 0.87
Iter: 170 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 180 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 190 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 200 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 210 Learn-Acc: 1.00 Test-Acc: 0.87
Iter: 220 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 230 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 240 Learn-Acc: 1.00 Test-Acc: 0.88
Iter: 250 Learn-Acc: 1.00

KeyboardInterrupt: 