In [2]:
import numpy as np
import sys
from keras.datasets import mnist

np.random.seed(1)


(x_train, y_train), (x_test, y_test) = mnist.load_data() # загружаем данные для обучения
images, labels = ((x_train[0:1000]).reshape(1000, 28*28)/255, y_train[0:1000]) # Преобразуем тренировочные данные

new_labels = np.zeros((len(labels),10)) # создаем вектор меток, где индекс будет являтся цифрой прогноза

for i, j in enumerate(labels): # Инициализаия тренировочных меток
    new_labels[i][j] = 1
labels = new_labels

test_images = x_test.reshape(len(x_test), 28*28)/255
test_labels = np.zeros((len(y_test), 10))

for i, j in enumerate(y_test): # Инициализаия тестовых меток
    test_labels[i][j] = 1


# Функии активации для слоев

def tanh(x): # Для скрытого слоя
    return np.tanh(x)

def tanh2deriv(output): # Для градиентного спуска
    return 1 - (output**2)

def softmax(x): # Для выходного слоя
    temp = np.exp(x)
    return temp/np.sum(temp, axis=1, keepdims=True)

im_batch_size = 100  # Размер пакета изображений
alpha = 2 
iterations = 300 # Число итераций
hidden_size = 100  # Размер скрытого слоя
pixels_per_images, num_labels = (784, 10) # Кол-во пикселов на изображении, кол-во в векторе меток

weight_0_1 = 0.02*np.random.random((pixels_per_images, hidden_size)) - 0.01
weight_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

In [3]:
# Обучение и проверка
for j in range(iterations):
    error, correct_cnt = (0.0, 0)

    for i in range(int(len(images)/im_batch_size)):

        batch_start, batch_end = ((i*im_batch_size), ((i+1)*im_batch_size))
        layer_0 = images[batch_start:batch_end]

        layer_1 = tanh(np.dot(layer_0, weight_0_1))

        dropout_mask = np.random.randint(2, size = layer_1.shape)

        layer_1 *= dropout_mask*2

        layer_2 = softmax(np.dot(layer_1, weight_1_2))
        
        error += np.sum((labels[batch_start:batch_end] - layer_2)**2)
        for k in range(im_batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

        layer_2_delta = (labels[batch_start:batch_end] - layer_2)/(im_batch_size*layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weight_1_2.T)*tanh2deriv(layer_1) 

        layer_1_delta *=dropout_mask

        weight_1_2 += alpha*layer_1.T.dot(layer_2_delta)
        weight_0_1 += alpha*layer_0.T.dot(layer_1_delta)
    test_correct_cnt = 0

    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = tanh(np.dot(layer_0,weight_0_1))
        layer_2 = np.dot(layer_1,weight_1_2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
    if(j % 10 == 0):
        sys.stdout.write("\n"+ "I:" + str(j) + \
        " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+\
        " Train-Acc:" + str(correct_cnt/float(len(images))))


I:0 Test-Acc:0.394 Train-Acc:0.156
I:10 Test-Acc:0.6867 Train-Acc:0.723
I:20 Test-Acc:0.7025 Train-Acc:0.732
I:30 Test-Acc:0.734 Train-Acc:0.763
I:40 Test-Acc:0.7663 Train-Acc:0.794
I:50 Test-Acc:0.7913 Train-Acc:0.819
I:60 Test-Acc:0.8102 Train-Acc:0.849
I:70 Test-Acc:0.8228 Train-Acc:0.864
I:80 Test-Acc:0.831 Train-Acc:0.867
I:90 Test-Acc:0.8364 Train-Acc:0.885
I:100 Test-Acc:0.8407 Train-Acc:0.883
I:110 Test-Acc:0.845 Train-Acc:0.891
I:120 Test-Acc:0.8481 Train-Acc:0.901
I:130 Test-Acc:0.8505 Train-Acc:0.901
I:140 Test-Acc:0.8526 Train-Acc:0.905
I:150 Test-Acc:0.8555 Train-Acc:0.914
I:160 Test-Acc:0.8577 Train-Acc:0.925
I:170 Test-Acc:0.8596 Train-Acc:0.918
I:180 Test-Acc:0.8619 Train-Acc:0.933
I:190 Test-Acc:0.863 Train-Acc:0.933
I:200 Test-Acc:0.8642 Train-Acc:0.926
I:210 Test-Acc:0.8653 Train-Acc:0.931
I:220 Test-Acc:0.8668 Train-Acc:0.93
I:230 Test-Acc:0.8672 Train-Acc:0.937
I:240 Test-Acc:0.8681 Train-Acc:0.938
I:250 Test-Acc:0.8687 Train-Acc:0.937
I:260 Test-Acc:0.8684 Train-

In [15]:
# Посмотреть результат обучения

import cv2

random_index = np.random.randint(0, len(x_test))

random_image = x_test[random_index]
random_label = y_test[random_index]

random_image2 = cv2.resize(random_image, (28, 28))
random_image = random_image2.reshape(1, 784)
random_image = random_image/255.0

layer_f = random_image
layer_f1 = tanh(np.dot(layer_f, weight_0_1))
layer_f2 = softmax(np.dot(layer_f1, weight_1_2))


prediction = np.argmax(layer_f2)

print('I think this is number: ', prediction)
print('this is the number: ', random_label)

cv2.imshow("test_image", random_image2)
cv2.waitKey(0)
cv2.destroyAllWindows()


I think this is number:  6
this is the number:  6
