# CNN - Convolutional Neural Network

In [2]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt


In [3]:
mnist = fetch_openml('mnist_784', as_frame=False)

  warn(


In [4]:
n = int(1e3)

labels = []
for val in mnist.target:
    label = np.zeros(10)
    label[int(val)] = 1
    labels.append(label)

X_train, X_test, y_train, y_test = train_test_split(
    mnist.data / 255.0,
    labels,
    test_size=0.2,
    shuffle=True
)
X_train = X_train[:n]
y_train = y_train[:n]
X_test = X_test[:n]
y_test = y_test[:n]

In [5]:
def relu(x):
    return (x >= 0) * x


def relu2deriv(x):
    return x >= 0


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid2deriv(x):
    return x * (1 - x)


def tanh(x):
    return np.tanh(x)


def tanh2deriv(x):
    return 1 - (x ** 2)


def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [6]:
kernel_size = 3
n_kernels = 16
kernels = 0.02 * np.random.random((kernel_size * kernel_size, n_kernels)) - 0.01

batch_size = 128
alpha = 2
epochs = 300
hidden_size = ((28 - kernel_size) *
               (28 - kernel_size)) * n_kernels

weights_1_2 = 0.2 * np.random.random((hidden_size, 10)) - 0.1

train_acc_list = []
test_acc_list = []


def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:, row_from:row_to, col_from:col_to]
    return section.reshape(-1, 1, row_to - row_from, col_to - col_from)


for e in range(epochs):
    correct_cnt = 0
    for i in range(int(len(X_train) / batch_size)):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size

        layer_0 = X_train[batch_start: batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)

        sects = []
        for row_start in range(layer_0.shape[1] - kernel_size):
            for col_start in range(layer_0.shape[2] - kernel_size):
                sect = get_image_section(layer_0, row_start, row_start + kernel_size, col_start,
                                         col_start + kernel_size)
                sects.append(sect)

        # print(len(sects), sects[0].shape) # 625 (128, 1, 3, 3)
        expanded_input = np.concatenate(sects, axis=1)
        # print(expanded_input.shape) # (128, 625, 3, 3)

        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0] * es[1], -1)
        # print(flattened_input.shape)
        #   (80000, 9) <- 80000 parts of different images of size 3x3

        kernel_output_1 = flattened_input.dot(kernels)
        # print(kernel_output.shape)
        #   (80000, 16) <- 80000 parts of different images holding values of 16 kernels
        layer_1 = tanh(kernel_output_1.reshape(es[0], -1))
        # print(layer_1.shape)
        #   (128, 10000) <- 128 images holding values of 10000 neurons = 16 kernels * 625 positions
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        # print(layer_2.shape)
        #   (128, 10) <- 128 images holding values of 10 neurons (labels)

        for k in range(batch_size):
            correct_cnt += int(
                np.argmax(layer_2[k:k + 1]) == np.argmax(y_train[batch_start + k: batch_start + k + 1])
            )

        layer_2_delta = (y_train[batch_start: batch_end] - layer_2) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        l1d_reshape = layer_1_delta.reshape(kernel_output_1.shape)
        k_update = flattened_input.T.dot(l1d_reshape)
        kernels -= alpha * k_update

    if e % 10 == 0:

        test_correct_cnt = 0

        for i in range(len(X_test)):
            layer_0 = X_test[i:i + 1]
            layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)

            sects = []
            for row_start in range(layer_0.shape[1] - kernel_size):
                for col_start in range(layer_0.shape[2] - kernel_size):
                    sect = get_image_section(layer_0, row_start, row_start + kernel_size, col_start,
                                             col_start + kernel_size)
                    sects.append(sect)

            expanded_input = np.concatenate(sects, axis=1)
            es = expanded_input.shape
            flattened_input = expanded_input.reshape(es[0] * es[1], -1)

            kernel_output_1 = flattened_input.dot(kernels)
            layer_1 = tanh(kernel_output_1.reshape(es[0], -1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_correct_cnt += int(
                np.argmax(layer_2) == np.argmax(y_test[i:i + 1])
            )

        train_acc_list.append(correct_cnt / len(X_train))
        test_acc_list.append(test_correct_cnt / len(X_test))

        print(f"Epoch: {e}")
        print(f" Train-Acc: {correct_cnt / len(X_train):.4} "
              f"\t Test-Acc: {test_correct_cnt / len(X_test):.4}")

KeyboardInterrupt: 

In [None]:
plt.plot(train_acc_list, label='train')
plt.plot(test_acc_list, label='test')

Gradient descent: od 4 min 34 s
```
Train-Err: 0.3154
Train-Acc: 0.903
Test-Err: 0.4271
Test-Acc: 0.786
```

Mini-batch gradient descent: 7 min 48 s
```
Train-Err: 0.3927
Train-Acc: 0.832
Test-Err: 0.4049
Test-Acc: 0.8
```

Mini-batch gradient descent with activation functions: 15 s
```
Train-Acc: 0.942
Test-Acc: 0.859
```

Convolution: 3 min 47 s
```
Train-Acc: 0.808
Test-Acc: 0.874
```


## 2 layers of convolution

Niestety nie udało mi się zaimplementować 2 warstw konwolucyjnych. Poniżej kod, który próbowałem napisać.

In [53]:
kernel_1_size = 3
n_kernels_1 = 16
kernels_1 = 0.02 * np.random.random((kernel_1_size * kernel_1_size, n_kernels_1)) - 0.01

kernel_2_size = 3
n_kernels_2 = 32
kernels_2 = 0.02 * np.random.random((kernel_2_size * kernel_2_size * n_kernels_1, n_kernels_2)) - 0.01

batch_size = 128
alpha = 2
epochs = 300
hidden_1_size = ((28 - kernel_1_size) *
                 (28 - kernel_1_size)) * n_kernels_1
print("hidden_1_size", hidden_1_size)

hidden_2_size = ((28 - kernel_1_size - kernel_2_size) *
                 (28 - kernel_1_size - kernel_2_size)) * n_kernels_2
print("hidden_2_size", hidden_2_size)

weights_2_3 = 0.2 * np.random.random((hidden_2_size, 10)) - 0.1

train_acc_list = []
test_acc_list = []


def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:, row_from:row_to, col_from:col_to, :]
    return section.reshape(-1, 1, row_to - row_from, col_to - col_from, layer.shape[3])


for e in range(epochs):
    correct_cnt = 0
    for i in range(int(len(X_train) / batch_size)):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size

        layer_0 = X_train[batch_start: batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28, 1)

        sects = []
        for row_start in range(layer_0.shape[1] - kernel_1_size):
            for col_start in range(layer_0.shape[2] - kernel_1_size):
                sect = get_image_section(layer_0, row_start, row_start + kernel_1_size, col_start,
                                         col_start + kernel_1_size)
                sects.append(sect)
        print("sects", len(sects), sects[0].shape)

        expanded_input_1 = np.concatenate(sects, axis=1)
        print("expanded_input_1", expanded_input_1.shape)

        es = expanded_input_1.shape
        flattened_input_1 = expanded_input_1.reshape(es[0] * es[1], -1)
        print("flattened_input_1", flattened_input_1.shape)

        kernel_output_1 = flattened_input_1.dot(kernels_1)
        layer_1 = tanh(kernel_output_1.reshape(es[0], -1))
        dropout_mask_1 = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask_1 * 2

        print("layer_1", layer_1.shape)

        print("===================")

        layer_2 = layer_1.reshape(layer_1.shape[0], 28 - kernel_1_size, 28 - kernel_1_size, n_kernels_1)
        print("layer_2", layer_2.shape)

        sects = []
        for row_start in range(layer_2.shape[1] - kernel_2_size):
            for col_start in range(layer_2.shape[2] - kernel_2_size):
                sect = get_image_section(layer_2, row_start, row_start + kernel_2_size, col_start,
                                         col_start + kernel_2_size)
                sects.append(sect)
        print("sects", len(sects), sects[0].shape)

        expanded_input_2 = np.concatenate(sects, axis=1)
        print("expanded_input_2", expanded_input_2.shape)

        es = expanded_input_2.shape
        flattened_input_2 = expanded_input_2.reshape(es[0] * es[1], -1)
        print("flattened_input_2", flattened_input_2.shape)

        kernel_output_2 = flattened_input_2.dot(kernels_2)
        print("kernel_output_2", kernel_output_2.shape)
        layer_2 = tanh(kernel_output_2.reshape(es[0], -1))
        dropout_mask_2 = np.random.randint(2, size=layer_2.shape)
        layer_2 *= dropout_mask_2 * 2
        print("layer_2", layer_2.shape)

        print("===================")

        layer_3 = softmax(np.dot(layer_2, weights_2_3))
        print("layer_3", layer_3.shape)

        for k in range(batch_size):
            correct_cnt += int(
                np.argmax(layer_2[k:k + 1]) == np.argmax(y_train[batch_start + k: batch_start + k + 1])
            )

        print("===================")
        print("===================")

        layer_3_delta = (y_train[batch_start: batch_end] - layer_3) / (batch_size * layer_3.shape[0])
        print("layer_3_delta", layer_3_delta.shape)

        print("weights_2_3", weights_2_3.shape)

        layer_2_delta = layer_3_delta.dot(weights_2_3.T) * tanh2deriv(layer_2)
        layer_2_delta *= dropout_mask_2
        print("layer_2_delta", layer_2_delta.shape)

        weights_2_3 += alpha * layer_2.T.dot(layer_3_delta)

        layer_1_delta = layer_2_delta.dot(kernels_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask_1

        l2d_reshape = layer_2_delta.reshape(kernel_output_2.shape)
        k_2_update = flattened_input_2.T.dot(l2d_reshape)
        kernels_2 -= alpha * k_2_update

        print("l2d_reshape", l2d_reshape.shape)
        print("layer_1", layer_1.shape)

        print("kernels_2", kernels_2.shape)

        layer_1_delta = layer_2_delta.dot(kernels_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask_1

        l2d_shape = layer_1_delta.reshape(kernel_output_1.shape)
        k_1_update = flattened_input_1.T.dot(l2d_shape)
        kernels_1 -= alpha * k_1_update

    if e % 10 == 0:

        test_correct_cnt = 0

        for i in range(len(X_test)):
            layer_0 = X_test[i:i + 1]
            layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)

            sects = []
            for row_start in range(layer_0.shape[1] - kernel_1_size):
                for col_start in range(layer_0.shape[2] - kernel_1_size):
                    sect = get_image_section(layer_0, row_start, row_start + kernel_1_size, col_start,
                                             col_start + kernel_1_size)
                    sects.append(sect)

            expanded_input = np.concatenate(sects, axis=1)
            es = expanded_input.shape
            flattened_input = expanded_input.reshape(es[0] * es[1], -1)

            kernel_output_1 = flattened_input.dot(kernels)
            layer_1 = tanh(kernel_output_1.reshape(es[0], -1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_correct_cnt += int(
                np.argmax(layer_2) == np.argmax(y_test[i:i + 1])
            )

        train_acc_list.append(correct_cnt / len(X_train))
        test_acc_list.append(test_correct_cnt / len(X_test))

        print(f"Epoch: {e}")
        print(f" Train-Acc: {correct_cnt / len(X_train):.4} "
              f"\t Test-Acc: {test_correct_cnt / len(X_test):.4}")

hidden_1_size 10000
hidden_2_size 15488
sects 625 (128, 1, 3, 3, 1)
expanded_input_1 (128, 625, 3, 3, 1)
flattened_input_1 (80000, 9)
layer_1 (128, 10000)
layer_2 (128, 25, 25, 16)
sects 484 (128, 1, 3, 3, 16)
expanded_input_2 (128, 484, 3, 3, 16)
flattened_input_2 (61952, 144)
kernel_output_2 (61952, 32)
layer_2 (128, 15488)
layer_3 (128, 10)
layer_3_delta (128, 10)
weights_2_3 (15488, 10)
layer_2_delta (128, 15488)


ValueError: shapes (128,15488) and (32,144) not aligned: 15488 (dim 1) != 32 (dim 0)