In [2]:
import numpy as np, sys
from keras.datasets import mnist
np.random.seed(1)

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [11]:
images, labels = x_train[0:1000].reshape(1000, 28*28)/255, y_train[0:1000]

In [12]:
one_hot_labels = np.zeros((len(labels), 10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

In [15]:
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

In [16]:
def tanh(x):
    return np.tanh(x)

def tanh2deriv(x):
    return 1 - (x**2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [21]:
alpha, iterations = (2, 300)
pixels_per_img, hidden_size, num_labels = (784, 100, 10)
batch_size = 100

weights_0_1 = 0.02 * np.random.random((pixels_per_img, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [22]:
for iter in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)): # 10 итераций
        batch_start, batch_end = (i* batch_size, (i + 1) * batch_size) # каждый раз сдвигает на 100
        
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size):
            correct_cnt += int( np.argmax(layer_2[k : k + 1]) == np.argmax(labels[batch_start + k: batch_start + k + 1]) )
        layer_2_delta = (labels[batch_start : batch_end] - layer_2) / (batch_size * layer_2.shape[0])
        
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    test_correct_cnt = 0
    for i in range(len(test_images)):
        
        layer_0 = test_images[i:i+1]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        test_correct_cnt += int( np.argmax(layer_2) == np.argmax(test_labels[i:i+1]) )
    
    if iter % 10 == 0 or iter == iterations - 1:
        sys.stdout.write("\n"+ \
                         "I:" + str(iter) + \
                         " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+ \
                         " Train-Acc:" + str(correct_cnt/float(len(images))))


I:0 Test-Acc:0.4501 Train-Acc:0.198
I:10 Test-Acc:0.6857 Train-Acc:0.711
I:20 Test-Acc:0.7016 Train-Acc:0.725
I:30 Test-Acc:0.7324 Train-Acc:0.763
I:40 Test-Acc:0.7633 Train-Acc:0.79
I:50 Test-Acc:0.788 Train-Acc:0.817
I:60 Test-Acc:0.8061 Train-Acc:0.853
I:70 Test-Acc:0.8169 Train-Acc:0.864
I:80 Test-Acc:0.8238 Train-Acc:0.875
I:90 Test-Acc:0.8304 Train-Acc:0.882
I:100 Test-Acc:0.8349 Train-Acc:0.894
I:110 Test-Acc:0.8392 Train-Acc:0.889
I:120 Test-Acc:0.8437 Train-Acc:0.899
I:130 Test-Acc:0.8467 Train-Acc:0.902
I:140 Test-Acc:0.8491 Train-Acc:0.906
I:150 Test-Acc:0.8503 Train-Acc:0.913
I:160 Test-Acc:0.8539 Train-Acc:0.92
I:170 Test-Acc:0.8561 Train-Acc:0.922
I:180 Test-Acc:0.8567 Train-Acc:0.917
I:190 Test-Acc:0.8584 Train-Acc:0.926
I:200 Test-Acc:0.8597 Train-Acc:0.929
I:210 Test-Acc:0.8612 Train-Acc:0.931
I:220 Test-Acc:0.863 Train-Acc:0.93
I:230 Test-Acc:0.8633 Train-Acc:0.937
I:240 Test-Acc:0.8637 Train-Acc:0.935
I:250 Test-Acc:0.8661 Train-Acc:0.934
I:260 Test-Acc:0.8657 Train