In [4]:
import numpy as np

In [5]:
class Conv2D:
    """Computes convolution given the input parameters"""
    def __init__(self, params, verbose=True, debug=False):
        super(Conv2D, self).__init__()
        default_params = {
        'stride': 1,
        'dilation': 1,
        'padding': 0
        }
        for key, value in default_params.items():
            setattr(self, key, params.get(key, value))
        self.inp_c = params['inp_c']
        self.inp_h = params['inp_h']
        self.inp_w = params['inp_w']
        self.ker_c = params['ker_c']
        self.ker_h = params['ker_h']
        self.ker_w = params['ker_w']
        self.num_ker = params['num_ker']
        self.input_img = None
        self.kernels = None
        self.out_c = None
        self.out_h = None
        self.out_w = None
        self.output = None
        self.verbose = verbose
        self.verboseprint = print if self.verbose else lambda *a, **k: None
        self.debug = debug
        self.debugprint = print if self.debug else lambda *a, **k: None
        self.print_params()
    
    def print_params(self):
        self.verboseprint('*** parameters ***')
        self.verboseprint('input channels: {}, input height: {}, input weight: {}'.format(self.inp_c, self.inp_h, self.inp_w))
        self.verboseprint('kernel channels: {}, kernel height: {}, kernel weight: {}'.format(self.ker_c, self.ker_h, self.ker_w))
        self.verboseprint('# kernels: {}, stride: {}, dilation factor: {}, padding: {}'.format(self.num_ker, self.stride, self.dilation, self.padding))
        self.verboseprint('\n')
        
    def create_input_img(self):
        # create image from the input parameters
        input_img = np.random.rand(self.inp_c, self.inp_h, self.inp_w) # define a random image based on the input parameters
        if self.debug:
            input_img = np.ones_like(input_img) # define an image of all ones based on the input parameters
        self.verboseprint('*** input image ***')
        self.verboseprint('input channels: {}, input height: {}, input weight: {}'.format(self.inp_c, self.inp_h, self.inp_w))
        self.verboseprint(input_img)
        self.verboseprint('\n')
        self.input_img = input_img
        self.add_padding()
        
    def add_padding(self):
        # add zero padding based on the input parameters
        if self.padding != 0:
            self.input_img = [np.pad(channel,self.padding, 'constant', constant_values=0) for channel in self.input_img]    
            self.inp_h += 2 * self.padding
            self.inp_w += 2 * self.padding
            self.verboseprint('*** padded input image ***')
            self.verboseprint('input channels: {}, input height: {}, input weight: {}'.format(self.inp_c, self.inp_h, self.inp_w))
            self.verboseprint(self.input_img)
            self.verboseprint('\n')
    
    def create_kernels(self):
        # create random kernels based on the input kernel parameters
        kernels = []
        self.verboseprint('*** kernels ***')
        self.verboseprint('# kernels: {}, kernel channels: {}, kernel height: {}, kernel weight: {}'.format(self.num_ker, self.ker_c, self.ker_h, self.ker_w))
        for k in range(self.num_ker):
            kernel = np.random.rand(self.ker_c, self.ker_h, self.ker_w) # define a random kernel based on the kernel parameters
            if self.debug:
                kernel = k * np.ones_like(kernel)
            kernels.append(kernel)
            self.verboseprint('kernel {}'.format(k))
            self.verboseprint(kernel)
        self.verboseprint('\n')
        self.kernels = kernels
        self.dilate_kernels()
        
    def dilate_kernels(self):
        # dilate a kernel
        dil_ker_h = self.dilation * (self.ker_h - 1) + 1
        dil_ker_w = self.dilation * (self.ker_w - 1) + 1
        dil_kernels = []
        for kernel in self.kernels:
            dil_kernel = []
            for channel in kernel:
                dil_channel = np.zeros((dil_ker_h, dil_ker_w))
                for row in range(len(channel)):
                    for col in range(len(channel[0])):
                        dil_channel[self.dilation*row][self.dilation*col] = channel[row][col]
                dil_kernel.append(dil_channel.tolist())
            dil_kernels.append(dil_kernel)
        self.kernels, self.ker_h, self.ker_w = dil_kernels, dil_ker_h, dil_ker_w
        self.verboseprint('*** dilated kernels ***')
        self.verboseprint('# kernels: {}, dilation factor: {}, kernel channels: {}, kernel height: {}, kernel weight: {}'.format(self.num_ker, self.dilation, self.ker_c, self.ker_h, self.ker_w))
        for k in range(self.num_ker):
            self.verboseprint('kernel {}'.format(k))
            self.verboseprint(self.kernels[k])
        self.verboseprint('\n')
        
    def compute_out_vol(self):
        # compute output volume from the input and kernel parameters
        out_c = int(self.num_ker)
        out_h = int((self.inp_h - self.ker_h)/self.stride) + 1
        out_w = int((self.inp_w - self.ker_h)/self.stride) + 1
        self.out_c, self.out_h, self.out_w = out_c, out_h, out_w
        
    def convolve(self, c, h, w, ker_num):
        # convolve kernel over the input slices
        self.debugprint('kernel indices, image indices')
        self.debugprint('[c, h, w]', '[c, h, w]')
        convol_sum = 0
        for c_ker in range(self.ker_c):
            for h_ker in range(self.ker_h):
                for w_ker in range(self.ker_w):
                    self.debugprint([c_ker, h_ker, w_ker], [c_ker, h_ker + self.stride*h, w_ker + self.stride*w])
                    convol_sum += self.kernels[ker_num][c_ker][h_ker][w_ker] * self.input_img[c_ker][h_ker + self.stride*h][w_ker + self.stride*w]
        self.debugprint('\n')
        return convol_sum
    
    def create_output(self):
        # create output from the input and kernel parameters 
        self.compute_out_vol()
        self.output = np.zeros([self.out_c, self.out_h, self.out_w])
        # parse through every element of the output and compute the convolution value for that element
        for k in range(self.num_ker):
            for h in range(self.out_h):
                for w in range(self.out_w):
                    for c in range(self.inp_c):
                        self.output[k, h, w] += self.convolve(c, h, w, k)
        self.verboseprint('*** output ***')
        output_shape = self.output.shape
        self.verboseprint('ouput channels: {}, output height: {}, output weight: {}'.format(output_shape[0], output_shape[1], output_shape[2]))
        assert((self.out_c, self.out_h, self.out_w) == output_shape)
        self.verboseprint(self.output)
        self.verboseprint('\n')

In [6]:
inp_c, inp_h, inp_w = 2, 4, 4 # input channels, input height, input weight
ker_c, ker_h, ker_w = 2, 2, 2 # kernel channels, kernel height, kernel weight
num_ker = 3 # number of kernels
stride = 2 # stride (optional)
dilation = 1 # dilation factor (optional)
padding = 0 # padding (optional)
params = {'inp_c':inp_c, 'inp_h':inp_h, 'inp_w':inp_w, 'ker_c':ker_c, 'ker_h':ker_h, 
              'ker_w':ker_w, 'num_ker':num_ker, 'stride':stride, 'dilation':dilation, 'padding':padding}

conv2D = Conv2D(params)
conv2D.create_input_img()
conv2D.create_kernels()
conv2D.create_output()

*** parameters ***
input channels: 2, input height: 4, input weight: 4
kernel channels: 2, kernel height: 2, kernel weight: 2
# kernels: 3, stride: 2, dilation factor: 1, padding: 0


*** input image ***
input channels: 2, input height: 4, input weight: 4
[[[0.03189701 0.34447088 0.54593777 0.74324329]
  [0.83183355 0.15150011 0.75168779 0.46031177]
  [0.05140644 0.05870652 0.96159251 0.32584898]
  [0.25141481 0.72684279 0.15071914 0.86774232]]

 [[0.99155332 0.08678528 0.57005055 0.96235653]
  [0.70565862 0.66741014 0.28141347 0.26410709]
  [0.06253505 0.91285188 0.33291433 0.23013499]
  [0.19096874 0.30642884 0.56231748 0.08992256]]]


*** kernels ***
# kernels: 3, kernel channels: 2, kernel height: 2, kernel weight: 2
kernel 0
[[[0.85283317 0.25225186]
  [0.53047771 0.81661633]]

 [[0.26542467 0.9462057 ]
  [0.90153309 0.87352403]]]
kernel 1
[[[0.23465223 0.54466016]
  [0.25204889 0.27191496]]

 [[0.7738552  0.17475702]
  [0.36735516 0.96644052]]]
kernel 2
[[[0.05312018 0.91086587]
