In [1]:
import numpy as np
import pandas as pd
import os
import cv2 as cv

Base class

In [2]:
class baseModel:
    def forward(self, x):
        pass

    def backward(self, output, learning_rate):
        pass

### Convolution: There will be four (hyper)parameters:
1. Number of output channels
2. Filter dimension
3. Stride
4. Padding

In [3]:
class ConvolutionLayer(baseModel):
    def __init__(self, output_channel, kernel_size, stride, padding):
        self.output_channel = output_channel
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.weights = None
        self.bias = None

    def forward(self, x):
        self.x = x # batch_size, channel, height, width
        batch_size, channel, height, width = x.shape
        output_height = (height - self.kernel_size + 2 * self.padding) // self.stride + 1
        output_width = (width - self.kernel_size + 2 * self.padding) // self.stride + 1
        self.output = np.zeros((batch_size, self.output_channel,  output_height, output_width))
        if self.weights is None:
            # self.weights = np.random.randn(self.output_channel, channel, self.kernel_size, self.kernel_size)
            # init weight with Xavier method
            self.weights = np.random.randn(self.output_channel, channel, self.kernel_size, self.kernel_size) * np.sqrt(2 / (channel * self.kernel_size * self.kernel_size))
            self.bias = np.random.randn(self.output_channel)
        
        new_arr = np.lib.stride_tricks.as_strided(x, 
            shape=(batch_size, channel, output_height, output_width, self.kernel_size, self.kernel_size), 
            strides=(x.strides[0], x.strides[1], x.strides[2], x.strides[3], x.strides[2], x.strides[3]))

    
        self.output = np.einsum('bihwkl,oikl->bohw', new_arr, self.weights) + self.bias[None, :, None, None]
        return self.output

        
        
        # for i in range(batch_size):
        #     for j in range(self.output_channel):
        #         for k in range(self.output.shape[2]):
        #             for l in range(self.output.shape[3]):
        #                 self.output[i, j, k, l] = np.sum(self.weights[j] * x[i, :, k * self.stride:k * self.stride + self.kernel_size, l * self.stride:l * self.stride + self.kernel_size]) + self.bias[j]
        # return self.output

    def backward(self, output, learning_rate):
        self.output = output
        self.learning_rate = learning_rate
        self.delta = np.zeros(self.x.shape)
        for i in range(self.output_channel):
            for j in range(self.output.shape[1]):
                for k in range(self.output.shape[2]):
                    self.delta[:, j * self.stride:j * self.stride + self.kernel_size, k * self.stride:k * self.stride + self.kernel_size] += self.weights[i] * self.output[i, j, k]
                    self.weights[i] -= self.learning_rate * self.x[:, j * self.stride:j * self.stride + self.kernel_size, k * self.stride:k * self.stride + self.kernel_size] * self.output[i, j, k]
                    self.bias[i] -= self.learning_rate * self.output[i, j, k]
        return self.delta

In [4]:
count = 1*1*5*5
input_arr = np.ones((1, 1, 5, 5))
print(f'input: {input_arr}\n')
conv_layer = ConvolutionLayer(2, 3, 1, 0)
print(conv_layer)
output = conv_layer.forward(input_arr)
print(output.shape)
print(f'output--------\n{output}\n')

input: [[[[1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1.]]]]

<__main__.ConvolutionLayer object at 0x7fc4a9e33d60>
(1, 2, 3, 3)
output--------
[[[[ 1.01521421  1.01521421  1.01521421]
   [ 1.01521421  1.01521421  1.01521421]
   [ 1.01521421  1.01521421  1.01521421]]

  [[-3.22733004 -3.22733004 -3.22733004]
   [-3.22733004 -3.22733004 -3.22733004]
   [-3.22733004 -3.22733004 -3.22733004]]]]



### ReLU: 

In [5]:
class ReLULayer(baseModel):
    def __init__(self):
        self.x = None

    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, output, learning_rate):
        self.output = output
        self.learning_rate = learning_rate
        self.delta = np.zeros(self.x.shape)
        return self.output * (self.x > 0)
        

In [6]:

output = ReLULayer().forward(output)
print(output.shape)
print(f'output--------\n{output}\n')

(1, 2, 3, 3)
output--------
[[[[1.01521421 1.01521421 1.01521421]
   [1.01521421 1.01521421 1.01521421]
   [1.01521421 1.01521421 1.01521421]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]]]



### MaxPooling: There will be two parameters:
1. Filter dimension
2. Stride

In [7]:
class MaxPoolingLayer(baseModel):
    def __init__(self, kernel_size, stride):
        self.kernel_size = kernel_size
        self.stride = stride

    def forward(self, x):
        self.x = x
        batch_size, channel, height, width = x.shape
        output_height = (height - self.kernel_size) // self.stride + 1
        output_width = (width - self.kernel_size) // self.stride + 1
        self.output = np.zeros((batch_size, channel, output_height, output_width))
        
        # maxpooling without loop
        new_arr = np.lib.stride_tricks.as_strided(x, 
            shape=(batch_size, channel, output_height, output_width, self.kernel_size, self.kernel_size), 
            strides=(x.strides[0], x.strides[1], x.strides[2] * self.stride, x.strides[3] * self.stride, x.strides[2], x.strides[3]))
        self.output = np.max(new_arr, axis=(4, 5))
        return self.output

    def backward(self, output, learning_rate):
        self.output = output
        self.learning_rate = learning_rate
        self.delta = np.zeros(self.x.shape)
        for i in range(self.x.shape[0]):
            for j in range(self.x.shape[1]):
                for k in range(self.x.shape[2]):
                    for l in range(self.x.shape[3]):
                        if self.x[i, j, k, l] == np.max(self.x[i, j, k:k + self.kernel_size, l:l + self.kernel_size]):
                            self.delta[i, j, k, l] = self.output[i, j, k // self.stride, l // self.stride]
        return self.delta

In [8]:
output = MaxPoolingLayer(2, 1).forward(output)

print(output.shape)
print(f'output--------\n{output}\n')

(1, 2, 2, 2)
output--------
[[[[1.01521421 1.01521421]
   [1.01521421 1.01521421]]

  [[0.         0.        ]
   [0.         0.        ]]]]



### Flattening: 

In [9]:
class FlatteningLayer(baseModel):
    def __init__(self):
        self.x = None

    def forward(self, x):
        self.x = x
        batch_size, channel, height, width = x.shape
        self.output = x.reshape((batch_size, channel * height * width))
        return self.output

    def backward(self, output, learning_rate):
        self.output = output
        self.learning_rate = learning_rate
        self.delta = self.output.reshape(self.x.shape)
        return self.delta

In [10]:
output = FlatteningLayer().forward(output)
print(output.shape)
print(f'output--------\n{output}\n')

(1, 8)
output--------
[[1.01521421 1.01521421 1.01521421 1.01521421 0.         0.
  0.         0.        ]]



### Fully-connected layer: a dense layer. There will be one paramete:
1. Output dimension

In [11]:
class FullyConnectedLayer(baseModel):
    def __init__(self, output_channel):
        self.output_channel = output_channel
        self.weights = None
        self.bias = None

    def forward(self, x):
        self.x = x
        if self.weights is None:
            self.weights = np.random.randn(self.x.shape[1], self.output_channel)
            self.bias = np.random.randn(self.output_channel)
        self.output = np.dot(self.x, self.weights) + self.bias
        return self.output

    def backward(self, output, learning_rate):
        self.output = output
        self.learning_rate = learning_rate
        self.delta = np.dot(self.output, self.weights.T)
        self.weights -= self.learning_rate * np.dot(self.x.T, self.output)
        self.bias -= self.learning_rate * np.sum(self.output, axis=0)
        return self.delta

In [12]:
output = FullyConnectedLayer(10).forward(output)
print(output.shape)
print(f'output--------\n{output}\n')

(1, 10)
output--------
[[-0.61239388  2.55712735  1.73963232  0.82078168 -2.51448282 -1.21357651
  -1.36029335  3.00209643  1.94648351  1.92367963]]



### Softmax: it will convert final layer projections to normalized probabilities

In [13]:
class SoftmaxLayer(baseModel):
    def __init__(self):
        self.x = None

    def forward(self, x):
        self.x = x
        self.output = np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
        return self.output

    def backward(self, output, learning_rate):
        self.output = output
        self.learning_rate = learning_rate
        self.delta = self.output
        return self.delta

In [14]:
output = SoftmaxLayer().forward(output)
print(output.shape)
print(f'output--------\n{output}\n')

(1, 10)
output--------
[[0.00967592 0.23024894 0.10166327 0.04056134 0.00144419 0.00530398
  0.00458019 0.3592901  0.12502544 0.12220664]]



### Create Model

In [23]:
def createModel():
    model = []
    
    file = open('model.txt', 'r')
    for line in file:
        line_info = line.split(' ')
        print(line_info)
        if line_info[0] == 'conv' :
            model.append(ConvolutionLayer(int(line_info[1]), int(line_info[2]), int(line_info[3]), int(line_info[4])))
        elif line_info[0] == 'relu':
            model.append(ReLULayer())
        elif line_info[0] == 'pool':
            model.append(MaxPoolingLayer(int(line_info[1]), int(line_info[2])))
        elif line_info[0] == 'flatten':
            model.append(FlatteningLayer())
        elif line_info[0] == 'fc':
            model.append(FullyConnectedLayer(int(line_info[1])))
        elif line_info[0] == 'softmax':
            model.append(SoftmaxLayer())
    file.close()
    print('model created: ', model)
    # return model

createModel()

['conv', '2', '3', '1', '0\n']
['relu', '\n']
['pool', '2', '3\n']
['conv', '2', '3', '1', '0\n']
['relu', '\n']
['pool', '2', '3\n']
['flatten', '\n']
['fc', '10\n']
['softmax']
model created:  [<__main__.ConvolutionLayer object at 0x7fc49faa99a0>, <__main__.ReLULayer object at 0x7fc49faa9550>, <__main__.MaxPoolingLayer object at 0x7fc49faa9970>, <__main__.ConvolutionLayer object at 0x7fc49fb17a90>, <__main__.ReLULayer object at 0x7fc49fb17af0>, <__main__.MaxPoolingLayer object at 0x7fc49fb17a30>, <__main__.FlatteningLayer object at 0x7fc49fb17eb0>, <__main__.FullyConnectedLayer object at 0x7fc49fb17220>, <__main__.SoftmaxLayer object at 0x7fc49fb17be0>]
