In [None]:
# def convolve(X, F):
#     dim1 = X.shape[0]-F.shape[0]+1
#     dim2 = X.shape[0]-F.shape[0]+1
#     res = np.full([dim1, dim2], fill_value=np.nan)
#     for i1 in range(dim1):
#         for i2 in range(dim2):
#             res[i1, i2] = np.sum(X[i1:i1+F.shape[0], i2:i2+F.shape[1]] * F)
#             print(i1, i1+F.shape[0], i2, i2+F.shape[1])
#     return res

import numpy as np
from keras.datasets import mnist
from keras.utils import to_categorical


(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

train_images = np.squeeze(train_images)
train_labels = np.squeeze(train_labels)
test_images = np.squeeze(test_images)
test_labels = np.squeeze(test_labels)

In [None]:
import numpy as np


def ReLU(x):
    return np.maximum(x, 0)


def D_ReLU(x):
    return np.diag((x >= 0).astype(float))


def SoftMax(x):
    tmp = np.exp(x - np.max(x))
    return tmp / np.sum(tmp)


def D_SoftMax(x):
    tmp = SoftMax(x)
    return -1 * np.outer(tmp, tmp) + np.diag(tmp)


def CategoricalCrossEntropy(y_pred, y_true):
    tmp = np.clip(y_pred, 1e-9, 1-1e-9)
    return np.mean(-1 * np.sum(y_true * np.log2(tmp) + (1 - y_true) * np.log2(1 - tmp), axis=1), axis=0)


def D_CategoricalCrossEntropy(y_pred, y_true):
    return y_pred - y_true
 

def L2_loss(y_pred, y_true):
    return np.mean(np.sum((y_pred - y_true)**2, axis=1), axis=0)


def D_L2_loss(y_pred, y_true):
    return 2 * (y_pred - y_true) / y_pred.shape[0]

In [None]:
import numpy as np


class DenseLayer:

    def __init__(self, n_inputs, n_outputs, activation, D_activation):
        self.weights = np.random.randn(n_outputs, n_inputs) / np.sqrt(n_inputs * n_outputs)
        self.bias = np.random.randn(n_outputs) / np.sqrt(n_outputs)
        self.activation = activation
        self.D_activation = D_activation
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.x = None
        self.D_y = None
        self.weights_error_lst = []
        self.bias_error_lst = []
        self.weights_error_rp = 0
        self.bias_error_rp = 0
    
    def forward(self, x):
        self.x = x
        z = np.dot(self.weights, x) + self.bias
        self.D_y = self.D_activation(z)
        y = self.activation(z)
        return y

    def backward(self, output_error):
        bias_error = np.dot(self.D_y.T, output_error)
        self.bias_error_lst.append(bias_error)
        weights_error = np.outer(bias_error, self.x)
        self.weights_error_lst.append(weights_error)
        input_error = np.dot(self.weights.T, bias_error)
        return input_error
    
    def update(self, learning_rate, adaption_rate):
        weights_error = np.mean(np.array(self.weights_error_lst), axis=0)
        self.weights_error_rp = np.clip(adaption_rate * self.weights_error_rp + (1 - adaption_rate) * weights_error**2, 1e-9, np.inf)
        self.weights -= learning_rate / (np.sqrt(self.weights_error_rp)) * weights_error
        self.weights_error_lst = []
        bias_error = np.mean(np.array(self.bias_error_lst), axis=0)
        self.bias_error_rp = np.clip(adaption_rate * self.bias_error_rp + (1 - adaption_rate) * bias_error**2, 1e-9, np.inf)
        self.bias -= learning_rate / (np.sqrt(self.bias_error_rp)) * bias_error
        self.bias_error_lst = []

    def reset(self):
        self.weights = np.random.randn(self.n_outputs, self.n_inputs) / np.sqrt(self.n_inputs * self.n_outputs)
        self.bias = np.random.randn(self.n_outputs) / np.sqrt(self.n_outputs)
        self.x = None
        self.D_y = None
        self.weights_error_lst = []
        self.bias_error_lst = []
        self.weights_error_rp = 0
        self.bias_error_rp = 0

In [None]:
import numpy as np
from scipy.signal import convolve2d


class ConvolutionalLayer:

    def __init__(self, n_filter, filter_shape, padding='valid'):
        self.filter = np.random.rand(n_filter, *filter_shape)
        self.padding = padding
        self.x = None
        self.D_y = None
        self.filter_error_lst = []
        self.filter_error_rp = 0
    
    @staticmethod
    def activation(x):
        return np.maximum(x, 0)

    @staticmethod
    def D_activation(x):
        return (x >= 0).astype(float)
    
    def forward(self, x):
        self.x = x
        depth_lst = []
        for depth_filter in self.filter:
            depth_lst.append(convolve2d(x, depth_filter, mode=self.padding, boundary='fill', fillvalue=0))
        z = np.array(depth_lst)
        depth_lst = []
        for depth_z in z:
            depth_lst.append(self.D_activation(depth_z))
        self.D_y = np.array(depth_lst)
        y = self.activation(z)
        return y

    def backward(self, output_error):
        activation_error = self.D_y * output_error
        depth_lst = []
        for depth_activation_error in activation_error:
            depth_lst.append(convolve2d(self.x, depth_activation_error, mode=self.padding, boundary='fill', fillvalue=0))
        filter_error = np.array(depth_lst)
        self.filter_error_lst.append(filter_error)
        depth_lst = []
        for depth_activation_error, depth_filter in zip(activation_error, self.filter):
            depth_lst.append(convolve2d(np.flipud(np.fliplr(depth_filter)), depth_activation_error, mode='full', boundary='fill', fillvalue=0))
        input_error = np.array(depth_lst)
        return input_error
    
    def update(self, learning_rate, adaption_rate):
        filter_error = np.mean(np.array(self.filter_error_lst), axis=0)
        self.filter_error_rp = np.clip(adaption_rate * self.filter_error_rp + (1 - adaption_rate) * filter_error**2, 1e-9, np.inf)
        self.filter -= learning_rate / (np.sqrt(self.filter_error_rp)) * filter_error
        self.filter_error_lst = []

    def reset(self):
        self.x = None
        self.D_y = None
        self.filter_error_lst = []
        self.filter_error_rp = 0

In [None]:
class FlattenLayer:

    def __init__(self):
        self.flatten_shape = None

    def forward(self, x):
       self.flatten_shape = x.shape
       y = x.flatten()
       return y

    def backward(self, output_error):
        input_error = output_error.reshape(self.flatten_shape)
        return input_error
    
    def update(self, learning_rate, adaption_rate):
        pass

    def reset(self):
        self.flatten_shape = None

In [None]:
import numpy as np


class MaxPoolingLayer:

    def __init__(self, stride=2):
        self.stride = stride
        self.max_pool = None

    def forward(self, x):
        depth_y_lst = []
        depth_max_pool_lst = []
        for depth_x in x:
            dim_1_range = range(0, self.stride * (int(depth_x.shape[0] / self.stride)), self.stride)
            dim_2_range = range(0, self.stride * (int(depth_x.shape[1] / self.stride)), self.stride)
            depth_max_pool = np.zeros_like(depth_x)
            depth_y = np.zeros([len(dim_1_range), len(dim_2_range)])
            for index_y_1, index_pool_1 in enumerate(dim_1_range):
                for index_y_2, index_pool_2 in enumerate(dim_2_range):
                    index_1_pool_slice = slice(index_pool_1, index_pool_1+self.stride)
                    index_2_pool_slice = slice(index_pool_2, index_pool_2+self.stride)
                    depth_x_pool = depth_x[index_1_pool_slice, index_2_pool_slice]
                    index_1_y_max, index_2_y_max = np.unravel_index(np.argmax(depth_x_pool), (self.stride, self.stride))
                    depth_y[index_y_1, index_y_2] = depth_x_pool[index_1_y_max, index_2_y_max]
                    depth_max_pool[index_1_pool_slice, index_2_pool_slice][index_1_y_max, index_2_y_max] = 1
            depth_y_lst.append(depth_y)
            depth_max_pool_lst.append(depth_max_pool)
        y = np.array(depth_y_lst)
        self.max_pool = np.array(depth_max_pool_lst)
        return y

    def backward(self, output_error):
        input_error = self.max_pool
        return input_error
    
    def update(self, learning_rate, adaption_rate):
        pass

    def reset(self):
        self.max_pool = None

In [None]:
import numpy as np


class ClassificationNetwork:

    def __init__(self):
        self.layer_lst = []

    def predict(self, X):
        y_pred_lst = []
        for x in X:
            y_pred_lst.append(self._forward(x))
        y_pred = np.array(y_pred_lst)
        return y_pred

    def _forward(self, x):
        y_pred = x
        for layer in self.layer_lst:
            y_pred = layer.forward(y_pred)
        return y_pred
    
    def _backward(self, y):
        y_pred = y
        for layer in reversed(self.layer_lst):
            y_pred = layer.backward(y_pred)
        return y_pred

    def _update(self, learning_rate, adaption_rate):
        for layer in self.layer_lst:
            layer.update(learning_rate, adaption_rate)

    def _reset(self):
        for layer in self.layer_lst:
            layer.reset()

    def fit(self, X_train, y_train, learning_rate=1e-1, adaption_rate=0.9, N_epochs=1000, N_batch=100):
        self._reset()
        for index_epoch in range(N_epochs):
            batch_indices = np.random.randint(0, X_train.shape[0] - 1, [N_batch])
            X_batch = X_train[batch_indices]
            y_batch = y_train[batch_indices]
            y_pred = np.full_like(y_batch, fill_value=np.nan)
            for index_batch, (x, y) in enumerate(zip(X_batch, y_batch)):
                y_pred[index_batch, :] = self._forward(x)
                self._backward(D_CategoricalCrossEntropy(y_pred[index_batch, :], y))
            self._update(learning_rate, adaption_rate)
            loss = CategoricalCrossEntropy(y_pred, y_batch)
            print("Epoch {}/{} Loss: {}".format(int(index_epoch + 1), N_epochs, loss))


In [None]:
N_epochs = 100
N_batch = 100
N_convolutional_layer_kernel_shape = (3, 3)
N_convolutional_layer_depth = 32
N_dense_relu_inputs = 5408
# N_dense_relu_inputs = 21632
N_dense_relu_outputs = 64
N_dense_softmax_inputs = N_dense_relu_outputs
N_dense_softmax_outputs = train_labels.shape[1]
learning_rate = 1e-4
adaption_rate = 0.5

network = ClassificationNetwork()
network.layer_lst.append(ConvolutionalLayer(N_convolutional_layer_depth, N_convolutional_layer_kernel_shape))
network.layer_lst.append(MaxPoolingLayer())
network.layer_lst.append(FlattenLayer())
network.layer_lst.append(DenseLayer(N_dense_relu_inputs, N_dense_relu_outputs, ReLU, D_ReLU))
network.layer_lst.append(DenseLayer(N_dense_softmax_inputs, N_dense_softmax_outputs, SoftMax, D_SoftMax))
network.fit(train_images, train_labels, learning_rate, adaption_rate, N_epochs, N_batch)

In [None]:
predicted_labels = network.predict(test_images)
accuracy = np.sum(np.argmax(predicted_labels, axis=1) == np.argmax(test_labels, axis=1)) / predicted_labels.shape[0] * 100
print("Accuracy: {:.1f}%".format(accuracy))