In [2]:
import numpy as np
import pandas as pd
import os
import cv2 as cv

Base class

In [3]:
class baseModel:
    def forward(self, x):
        pass

    def backward(self, output, learning_rate):
        pass

### Convolution: There will be four (hyper)parameters:
1. Number of output channels
2. Filter dimension
3. Stride
4. Padding

In [None]:
class ConvolutionLayer(baseModel):
    def __init__(self, output_channel, kernel_size, stride, padding):
        self.output_channel = output_channel
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.weights = None
        self.bias = None

    def forward(self, x):
        self.x = x # batch_size, channel, height, width
        batch_size, channel, height, width = x.shape
        self.output = np.zeros((batch_size, self.output_channel, (height - self.kernel_size + 2 * self.padding) // self.stride + 1, (width - self.kernel_size + 2 * self.padding) // self.stride + 1))
        if self.weights is None:
            # self.weights = np.random.randn(self.output_channel, channel, self.kernel_size, self.kernel_size)
            # init weight with Xavier method
            self.weights = np.random.randn(self.output_channel, channel, self.kernel_size, self.kernel_size) * np.sqrt(2 / (channel * self.kernel_size * self.kernel_size))
            self.bias = np.random.randn(self.output_channel)
        
        for i in range(batch_size):
            for j in range(self.output_channel):
                for k in range(self.output.shape[2]):
                    for l in range(self.output.shape[3]):
                        self.output[i, j, k, l] = np.sum(self.weights[j] * x[i, :, k * self.stride:k * self.stride + self.kernel_size, l * self.stride:l * self.stride + self.kernel_size]) + self.bias[j]
        
        return self.output

    def backward(self, output, learning_rate):
        self.output = output
        self.learning_rate = learning_rate
        self.delta = np.zeros(self.x.shape)
        for i in range(self.output_channel):
            for j in range(self.output.shape[1]):
                for k in range(self.output.shape[2]):
                    self.delta[:, j * self.stride:j * self.stride + self.kernel_size, k * self.stride:k * self.stride + self.kernel_size] += self.weights[i] * self.output[i, j, k]
                    self.weights[i] -= self.learning_rate * self.x[:, j * self.stride:j * self.stride + self.kernel_size, k * self.stride:k * self.stride + self.kernel_size] * self.output[i, j, k]
                    self.bias[i] -= self.learning_rate * self.output[i, j, k]
        return self.delta