In [3]:
import numpy as np
import pandas as pd
import scipy.signal
import PIL.Image as Image
import matplotlib.pyplot as plt

In [4]:
IMAGE_SIZE = (28, 28)

# Import data
data = pd.read_csv(r"../digit-recognizer/train.csv")
data_test = pd.read_csv(r"../digit-recognizer/test.csv")

# Get labels and image array from data
# Only 30000 data used. The rest were used for testing (with labels, to check the accuracy)
labels : np.ndarray = data.values[:30000, 0]
images : np.ndarray = data.values[:30000, 1:].astype('uint8').reshape((-1, 1) + IMAGE_SIZE) / 255

images_test : np.ndarray = data_test.values.astype('uint8') / 255

images_test_with_label = data.values[30000:, 1:].astype('uint8') / 255
labels_test_with_label = data.values[30000:, 0]

In [7]:
class Convolutional2DLayer:
    def __init__(self, in_channel: int, out_channel: int, input_shape: tuple, kernel_size: int, stride: int = 1, padding: int = 0):
        """Convolution layer of CNN. Receives: 
            in_channel -> amount of channels the input has
            out_channel -> amount of channels the output has
            input_shape -> input shape (without the channels)
            kernel_size -> kernel size used for convolution
            stride -> how many cells kernel will move
            padding -> amount of zero padding
        """

        self.kernels: np.ndarray = np.random.randn(out_channel, kernel_size, kernel_size)
        # self.kernels = np.ones((out_channel, kernel_size, kernel_size)) # debug
        self.stride: int = stride
        self.padding: int = padding
        self.input_shape: tuple = (in_channel,) + input_shape
        self.out_shape: tuple = (out_channel, ) + tuple(np.add(np.add(np.subtract(input_shape, kernel_size), 2*padding) // stride, 1))

        self.gradient = np.zeros(self.input_shape)

    def _convolve(self, x: np.ndarray, kernel: np.ndarray):
        """Convolution process"""

        # assert (type(x) == np.ndarray), f"Error: x must be a numpy array"
        x = np.pad(x, self.padding)
        res = np.zeros(self.out_shape[1:])
        for i in range(0, res.shape[1], self.stride):
            for j in range(0, res.shape[0], self.stride):
                for k in range(0, kernel.shape[1]):
                    for l in range(0, kernel.shape[0]):
                        res[i][j] += x[i+k][j+l] * kernel[k][l]

        return res

    def __call__(self, x: np.ndarray):
        """Forward method"""    

        assert (type(x) == np.ndarray), f"Error: x must be a numpy array"
        assert (x.shape == self.input_shape), f"Error: layer {self.__class__.__name__}accepts {self.input_shape} input, while x is shaped as {x.shape}"
        self.gradient += x
        output = np.zeros(self.out_shape, dtype=np.float64)
        for i, kernel in enumerate(self.kernels):
            for channel in x:
                output[i] += self._convolve(channel, kernel)
                # output[i] += scipy.signal.convolve2d(channel, kernel, mode='valid') # scipy method
        
        return output

class LinearLayer:
    def __init__(self, input, output):
        self.weight = np.random.randn(input, output)
        self.gradient: np.ndarray = np.zeros(input)

    def __call__(self, x: np.ndarray):
        assert (type(x) == np.ndarray), f"Error: x must be a numpy array"
        assert (x.shape[0] == self.weight.shape[0]), f"Error: this layer accepts (n, {self.weight.shape[0]}) input, while x is shaped as {x.shape}"

        self.gradient += x
        output = np.dot(x, self.weight)
        return output

class MaxPooling2d:
    def __init__(self, input_shape: tuple, kernel_size: int, stride: int=1, padding: int=0):
        """2 dimensional max pooling"""
        assert (len(input_shape) == 3), f"Error: 2D max pooling layer's input shape must be in (channel, row, height) format. Input is {input_shape}"
        self.input_shape: tuple = input_shape
        self.kernel_size: int = kernel_size
        self.stride: int = stride
        self.padding: int = padding    
        self.out_shape: tuple = (input_shape[0], ) + tuple(np.add(np.add(np.subtract(input_shape[1:], kernel_size), 2*padding) // stride, 1))
        self.gradient: np.ndarray = np.zeros(self.out_shape)
    
    def __call__(self, x: np.ndarray):
            """2 dimensional max pooling"""

            assert (type(x) == np.ndarray), f"Error: x must be a numpy array"
            assert (x.shape == self.input_shape), f"Error: this layer accepts {self.input_shape} input, while x is shaped as {x.shape}"
            # print(x.shape)
            # print(x.shape[0])
            # print((np.subtract(x.shape[1:], kernel_size) // stride))
            output = np.full(self.out_shape, 0, dtype=np.float64)    
            for c in range(output.shape[0]):
                for i in range(0, output.shape[2], self.stride):
                    for j in range(0, output.shape[1], self.stride):
                        # output[i][j] = 
                        for k in range(self.kernel_size):
                            for l in range(self.kernel_size):
                                # print(output[c][i][j], x[c][i+k][j+l])
                                output[c][i][j] = max(output[c][i][j], x[c][i+k][j+l])

            self.gradient += output

            return output

def relu(x: np.ndarray):
    return np.maximum(0, x)

def get_flatten_shape(shape: tuple):
    return int(np.prod(np.array(shape)))

def flatten(x: np.ndarray):
    return x.reshape((-1))

def softmax(x: np.ndarray):
    exps = np.exp(x - x.max())
    return exps / np.sum(exps)

conv1 = Convolutional2DLayer(1, 5, IMAGE_SIZE, 3)            
maxpool1 = MaxPooling2d(conv1.out_shape, 2, stride=2)
conv2 = Convolutional2DLayer(maxpool1.out_shape[0], 10, maxpool1.out_shape[1:], 3)
maxpool2 = MaxPooling2d(conv2.out_shape, 2, stride=1)
linear1 = LinearLayer(get_flatten_shape(maxpool2.out_shape), 512)
linear2 = LinearLayer(512, 10)

res = conv1(images[0])
res = relu(res)
res = maxpool1(res)
res = conv2(res)
res = relu(res)
res = maxpool2(res)
res = flatten(res)
res = linear1(res)
res = relu(res)
res = linear2(res)
res = softmax(res)

# maxpool3 = MaxPooling2d((1, 2, 2), 2)
# print(maxpool3(np.array([[[1, 2], [3, 4]]])))

# print(linear1.gradient)

print(res)
print(res.argmax())

[0.00000000e+00 5.03401544e-11 0.00000000e+00 0.00000000e+00
 4.31998547e-30 0.00000000e+00 1.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00]
6
