In [1]:
import sys
import numpy as np

from keras.datasets import mnist




In [2]:
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Preprocess the training data
train_images = x_train[0:1000].reshape(1000, 28*28) / 255
train_labels = np.zeros((len(y_train[0:1000]), 10))
for i, l in enumerate(y_train[0:1000]):
    train_labels[i][l] = 1

# Assign labels to one_hot_labels
labels = train_labels

# Preprocess the test data
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: x >= 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)
weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

train_acc_list = {}
test_acc_list = {}

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(len(train_images)):
        layer_0 = train_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r" + \
                     " I:" + str(j) + \
                     " Error:" + str(error/float(len(train_images)))[0:5] + \
                     " Correct:" + str(correct_cnt/float(len(train_images))))
    if (j % 10 == 0 or j == iterations-1):
        train_acc_list[j] = correct_cnt/float(len(train_images))
        error, correct_cnt = (0.0, 0)
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] + \
                         " Test-Acc:" + str(correct_cnt/float(len(test_images))) + "\n")
        test_acc_list[j] = correct_cnt/float(len(test_images))


        

 I:0 Error:0.722 Correct:0.537 Test-Err:0.601 Test-Acc:0.6488
 I:10 Error:0.312 Correct:0.901 Test-Err:0.420 Test-Acc:0.8114
 I:20 Error:0.260 Correct:0.937 Test-Err:0.414 Test-Acc:0.8111
 I:30 Error:0.232 Correct:0.946 Test-Err:0.417 Test-Acc:0.8066
 I:40 Error:0.215 Correct:0.956 Test-Err:0.426 Test-Acc:0.8019
 I:50 Error:0.204 Correct:0.966 Test-Err:0.437 Test-Acc:0.7982
 I:60 Error:0.194 Correct:0.967 Test-Err:0.448 Test-Acc:0.7921
 I:70 Error:0.186 Correct:0.975 Test-Err:0.458 Test-Acc:0.7864
 I:80 Error:0.179 Correct:0.979 Test-Err:0.466 Test-Acc:0.7817
 I:90 Error:0.172 Correct:0.981 Test-Err:0.474 Test-Acc:0.7758
 I:100 Error:0.166 Correct:0.984 Test-Err:0.482 Test-Acc:0.7706
 I:110 Error:0.161 Correct:0.984 Test-Err:0.489 Test-Acc:0.7686
 I:120 Error:0.157 Correct:0.986 Test-Err:0.496 Test-Acc:0.766
 I:130 Error:0.153 Correct:0.999 Test-Err:0.502 Test-Acc:0.7622
 I:140 Error:0.149 Correct:0.991 Test-Err:0.508 Test-Acc:0.758
 I:150 Error:0.145 Correct:0.991 Test-Err:0.513 Test-

In [3]:
correct_cnt/float(len(train_images))

7.073

In [4]:
# draw the graph in the end of the training and testing accuracy in plotly
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(train_acc_list.keys()), y=list(train_acc_list.values()), mode='lines+markers', name='Train Accuracy'))
fig.add_trace(go.Scatter(x=list(test_acc_list.keys()), y=list(test_acc_list.values()), mode='lines+markers', name='Test Accuracy'))
fig.update_layout(title='Train and Test Accuracy', xaxis_title='Iterations', yaxis_title='Accuracy')
fig.show()


# regularization - dropout

In [5]:
np.random.randint(2, size=layer_1.shape)


array([[0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1,
        1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0]])

In [8]:
i = 0
layer_0 = train_images[i:i+1]
dropout_mask = np.random.randint(2, size=layer_1.shape)

layer_1*= dropout_mask * 2
layer_2 = np.dot(layer_1, weights_1_2)

error += np.sum((labels[i:i+1] - layer_2) ** 2)
correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

layer_2_delta = (labels[i:i+1] - layer_2)
layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)

layer_1_delta

array([[ 0.0234819 ,  0.10536939, -0.14983653, -0.14352343,  0.05995137,
         0.07663912, -0.14933566,  0.02289369, -0.03448445, -0.06408803,
         0.05417756,  0.14481582, -0.05018912, -0.01035672,  0.7293712 ,
         0.08782312,  0.0387063 ,  0.00382981,  0.02033885, -0.06837911,
         0.00838287, -0.20085782, -0.05446102,  0.03096702, -0.02732371,
         0.10253955,  0.05296539, -0.01697864, -0.12511889, -0.00814049,
         0.30843454, -0.04535994, -0.18104463, -0.13682708, -0.11248576,
        -0.12779853,  0.03245115,  0.06128092,  0.04422866,  0.09307085]])

In [9]:
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Preprocess the training data
train_images = x_train[0:1000].reshape(1000, 28*28) / 255
train_labels = np.zeros((len(y_train[0:1000]), 10))
for i, l in enumerate(y_train[0:1000]):
    train_labels[i][l] = 1

# Assign labels to one_hot_labels
labels = train_labels

# Preprocess the test data
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x: (x >= 0) * x
relu2deriv = lambda x: x >= 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)
weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

train_acc_list_dropout = {}
test_acc_list_dropout = {}

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(len(train_images)):
        layer_0 = train_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1, weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r" + \
                     " I:" + str(j) + \
                     " Error:" + str(error/float(len(train_images)))[0:5] + \
                     " Correct:" + str(correct_cnt/float(len(train_images))))
    if (j % 10 == 0 or j == iterations-1):
        train_acc_list_dropout[j] = correct_cnt/float(len(train_images))
        error, correct_cnt = (0.0, 0)
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] + \
                         " Test-Acc:" + str(correct_cnt/float(len(test_images))) + "\n")
        test_acc_list_dropout[j] = correct_cnt/float(len(test_images))


# draw the graph in the end of the training and testing accuracy in plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(train_acc_list.keys()), y=list(train_acc_list.values()), mode='lines+markers', name='Train Accuracy'))
fig.add_trace(go.Scatter(x=list(test_acc_list.keys()), y=list(test_acc_list.values()), mode='lines+markers', name='Test Accuracy'))
fig.add_trace(go.Scatter(x=list(train_acc_list_dropout.keys()), y=list(train_acc_list_dropout.values()), mode='lines+markers', name='Train Accuracy with Dropout'))
fig.add_trace(go.Scatter(x=list(test_acc_list_dropout.keys()), y=list(test_acc_list_dropout.values()), mode='lines+markers', name='Test Accuracy with Dropout'))
fig.update_layout(title='Train and Test Accuracy', xaxis_title='Iterations', yaxis_title='Accuracy')
fig.show()


 I:0 Error:0.885 Correct:0.289 Test-Err:0.718 Test-Acc:0.5418
 I:10 Error:0.564 Correct:0.647 Test-Err:0.501 Test-Acc:0.7365
 I:20 Error:0.530 Correct:0.681 Test-Err:0.478 Test-Acc:0.7621
 I:30 Error:0.508 Correct:0.713 Test-Err:0.457 Test-Acc:0.7915
 I:40 Error:0.492 Correct:0.719 Test-Err:0.445 Test-Acc:0.7998
 I:50 Error:0.462 Correct:0.742 Test-Err:0.430 Test-Acc:0.8145
 I:60 Error:0.472 Correct:0.746 Test-Err:0.446 Test-Acc:0.7974
 I:70 Error:0.463 Correct:0.744 Test-Err:0.445 Test-Acc:0.7901
 I:80 Error:0.461 Correct:0.764 Test-Err:0.426 Test-Acc:0.8105
 I:90 Error:0.462 Correct:0.749 Test-Err:0.435 Test-Acc:0.7871
 I:100 Error:0.452 Correct:0.769 Test-Err:0.433 Test-Acc:0.8039
 I:110 Error:0.439 Correct:0.778 Test-Err:0.435 Test-Acc:0.8099
 I:120 Error:0.451 Correct:0.778 Test-Err:0.442 Test-Acc:0.7871
 I:130 Error:0.452 Correct:0.783 Test-Err:0.439 Test-Acc:0.811
 I:140 Error:0.445 Correct:0.779 Test-Err:0.443 Test-Acc:0.8049
 I:150 Error:0.457 Correct:0.783 Test-Err:0.446 Test

## Пактный градиентный спуск (mini-batch gradient descent)
- это метод оптимизации, который используется для обучения нейронных сетей. Он является модификацией стохастического градиентного спуска (SGD), в котором вместо одного примера используется небольшой набор примеров. Это позволяет уменьшить дисперсию градиента и ускорить обучение.

In [11]:
batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

train_acc_list_minibatch = {}
test_acc_list_minibatch = {}

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(train_images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1)*batch_size))
        layer_0 = train_images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1, weights_1_2)

        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

            layer_2_delta = (labels[batch_start:batch_end] - layer_2) / batch_size
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)
            layer_1_delta *= dropout_mask

            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    if (j%10 ==0 or j == iterations-1):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" I:" + str(j) + \
                         " Test-Err:" + str(test_error/float(len(test_images)))[0:5] + \
                         " Test-Acc:" + str(test_correct_cnt/float(len(test_images))) + \
                         " Train-Err:" + str(error/float(len(train_images)))[0:5] + \
                         " Train-Acc:" + str(correct_cnt/float(len(train_images))) + "\n")
        train_acc_list_minibatch[j] = correct_cnt/float(len(train_images))
        test_acc_list_minibatch[j] = test_correct_cnt/float(len(test_images))


fig = go.Figure()
fig.add_trace(go.Scatter(x=list(train_acc_list.keys()), y=list(train_acc_list.values()), mode='lines+markers', name='Train Accuracy'))
fig.add_trace(go.Scatter(x=list(test_acc_list.keys()), y=list(test_acc_list.values()), mode='lines+markers', name='Test Accuracy'))
fig.add_trace(go.Scatter(x=list(train_acc_list_dropout.keys()), y=list(train_acc_list_dropout.values()), mode='lines+markers', name='Train Accuracy with Dropout'))
fig.add_trace(go.Scatter(x=list(test_acc_list_dropout.keys()), y=list(test_acc_list_dropout.values()), mode='lines+markers', name='Test Accuracy with Dropout'))
fig.add_trace(go.Scatter(x=list(train_acc_list_minibatch.keys()), y=list(train_acc_list_minibatch.values()), mode='lines+markers', name='Train Accuracy with Minibatch'))
fig.add_trace(go.Scatter(x=list(test_acc_list_minibatch.keys()), y=list(test_acc_list_minibatch.values()), mode='lines+markers', name='Test Accuracy with Minibatch'))
fig.update_layout(title='Train and Test Accuracy', xaxis_title='Iterations', yaxis_title='Accuracy')
fig.show()        

 I:0 Test-Err:0.823 Test-Acc:0.3943 Train-Err:1.253 Train-Acc:0.136
 I:10 Test-Err:0.568 Test-Acc:0.7431 Train-Err:0.605 Train-Acc:0.694
 I:20 Test-Err:0.507 Test-Acc:0.7814 Train-Err:0.537 Train-Acc:0.732
 I:30 Test-Err:0.475 Test-Acc:0.7998 Train-Err:0.500 Train-Acc:0.746
 I:40 Test-Err:0.453 Test-Acc:0.8041 Train-Err:0.475 Train-Acc:0.764
 I:50 Test-Err:0.446 Test-Acc:0.8005 Train-Err:0.451 Train-Acc:0.781
 I:60 Test-Err:0.437 Test-Acc:0.7981 Train-Err:0.453 Train-Acc:0.803
 I:70 Test-Err:0.440 Test-Acc:0.8001 Train-Err:0.448 Train-Acc:0.788
 I:80 Test-Err:0.442 Test-Acc:0.7912 Train-Err:0.445 Train-Acc:0.806
 I:90 Test-Err:0.436 Test-Acc:0.7989 Train-Err:0.444 Train-Acc:0.79
 I:100 Test-Err:0.443 Test-Acc:0.7899 Train-Err:0.457 Train-Acc:0.781
 I:110 Test-Err:0.435 Test-Acc:0.8018 Train-Err:0.437 Train-Acc:0.809
 I:120 Test-Err:0.442 Test-Acc:0.7986 Train-Err:0.432 Train-Acc:0.813
 I:130 Test-Err:0.442 Test-Acc:0.8048 Train-Err:0.434 Train-Acc:0.82
 I:140 Test-Err:0.438 Test-Acc:0.