# Verify the Pytorch Conv-related APIs and write transposed Conv by hand

## Use the class nn.Conv2d

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

in_channels = 1
out_channels = 1
kernel_size = 3
batch_size = 1
bias = False
input_size = [batch_size, in_channels, 4, 4]
# using the class nn.Conv2d
conv_layer = nn.Conv2d(in_channels, out_channels, bias=bias, kernel_size=kernel_size)
input_feature_map = torch.randn(input_size)
output_feature_map = conv_layer(input_feature_map)
print(conv_layer.weight.data) # in_channels * out_channels * height * width
print(input_feature_map) 
print(output_feature_map)

tensor([[[[-0.2632, -0.3305,  0.2244],
          [ 0.0297, -0.0928, -0.1374],
          [ 0.1159,  0.1170,  0.1737]]]])
tensor([[[[ 2.7325e+00,  5.0864e-01,  7.5504e-01, -9.9844e-01],
          [ 1.4132e+00, -2.0177e+00, -2.4825e-01,  1.1625e+00],
          [-3.0812e-01,  4.5179e-01,  9.7272e-04,  1.2158e+00],
          [-1.5355e+00, -2.6443e-01,  1.9800e+00,  1.8255e-01]]]])
tensor([[[[-0.4373, -0.5404],
          [ 0.3233,  0.9529]]]], grad_fn=<ConvolutionBackward0>)


## Use the function F.conv2d

In [3]:
# using the function F.conv2d
output_feature_map_f = F.conv2d(input_feature_map, conv_layer.weight.data)
print(output_feature_map_f)

tensor([[[[-0.4373, -0.5404],
          [ 0.3233,  0.9529]]]])


## 1. Use for-loops and matrix multiplications to implement conv operation

In [23]:
import math
def matrix_multiplication_for_conv2d(input_feature_map, kernel, bias=0, stride=1, padding=0):
    if padding>0:
        input_feature_map = F.pad(input_feature_map, pad=(padding, padding, padding, padding))
        
    input_h, input_w = input_feature_map.shape[-2:]
    kernel_h, kernel_w = kernel.shape
    output_h = math.floor((input_h-kernel_h)/stride) + 1
    output_w = math.floor((input_w-kernel_w)/stride) + 1
    output_feature_map = torch.zeros(output_h, output_w)
    for i in range(0, input_h-kernel_h+1, stride):
        for j in range(0, input_h-kernel_h+1, stride):
            region = input_feature_map[i:i+kernel_h, j:j+kernel_w]
            output_feature_map[int(i/stride),int(j/stride)] = torch.sum( region*kernel ) + bias
            
    return output_feature_map

input_data = torch.randn(5, 5)
kernel = torch.randn(3, 3)
bias = torch.randn(1)

matmul_output = matrix_multiplication_for_conv2d(input_data, kernel, bias=bias, padding=1, stride=2)
print(matmul_output)
# call Pytorch API
pytorch_api_conv_output = F.conv2d(input_data.reshape(1,1,input_data.shape[0],input_data.shape[1]),\
                                kernel.reshape(1,1,kernel.shape[0],kernel.shape[1]),\
                                padding=1, bias=bias, stride=2)
print(pytorch_api_conv_output.squeeze(0).squeeze(0))

tensor([[-0.8371,  0.0981, -2.8122],
        [ 0.5762, -0.6831, -2.5968],
        [-0.4870, -2.5456, -0.7429]])
tensor([[-0.8371,  0.0981, -2.8122],
        [ 0.5762, -0.6831, -2.5968],
        [-0.4870, -2.5456, -0.7429]])


## 2. store all the flattened regions in a matrix and perform a matrix multiplication with a vectorized kernel

In [29]:
def matrix_multiplication_for_conv2d_flatten(input_feature_map, kernel, bias=0, stride=1, padding=0):
    if padding>0:
        input_feature_map = F.pad(input_feature_map, pad=(padding, padding, padding, padding))
        
    input_h, input_w = input_feature_map.shape[-2:]
    kernel_h, kernel_w = kernel.shape
    output_h = math.floor((input_h-kernel_h)/stride) + 1
    output_w = math.floor((input_w-kernel_w)/stride) + 1
    output_feature_map = torch.zeros(output_h, output_w)
    region_matrix = torch.zeros(output_feature_map.numel(), kernel.numel())
    kernel_vector = kernel.reshape(-1,1) # or kernel.reshape( kernel.numel(), 1 )
    row_index = 0
    for i in range(0, input_h-kernel_h+1, stride):
        for j in range(0, input_h-kernel_h+1, stride):
            region = input_feature_map[i:i+kernel_h, j:j+kernel_w]
            region_flatten = region.flatten()
            region_matrix[row_index] = region_flatten
            row_index += 1
            
    output_feature_map = region_matrix @ kernel_vector + bias
    output_feature_map = torch.reshape(output_feature_map, (output_h, output_w))
            
    return output_feature_map

output_feature_map_flatten = matrix_multiplication_for_conv2d_flatten(input_data, kernel, bias=bias, padding=1, stride=2)
print(output_feature_map_flatten)
pytorch_api_conv_output = F.conv2d(input_data.reshape(1,1,input_data.shape[0],input_data.shape[1]),\
                                kernel.reshape(1,1,kernel.shape[0],kernel.shape[1]),\
                                padding=1, bias=bias, stride=2).squeeze(0).squeeze(0)
print(pytorch_api_conv_output)

torch.allclose(output_feature_map_flatten, pytorch_api_conv_output)

tensor([[-0.8371,  0.0981, -2.8122],
        [ 0.5762, -0.6831, -2.5968],
        [-0.4870, -2.5456, -0.7429]])
tensor([[-0.8371,  0.0981, -2.8122],
        [ 0.5762, -0.6831, -2.5968],
        [-0.4870, -2.5456, -0.7429]])


## 3. full step 2 by considering batchsize and channels

In [32]:
def matrix_multiplication_for_conv2d(input_feature_map, kernel, bias=0, stride=1, padding=0):
    if padding>0:
        input_feature_map = F.pad(input_feature_map, pad=(padding, padding, padding, padding))

    batch_size, in_channels, input_h, input_w = input_feature_map.shape
    out_channels, in_channels, kernel_h, kernel_w = kernel.shape
    output_h = math.floor((input_h-kernel_h)/stride) + 1
    output_w = math.floor((input_w-kernel_w)/stride) + 1
    output_feature_map = torch.zeros(batch_size, out_channels, output_h, output_w)
    for ind in range(batch_size):
        for oc in range(out_channels):
            for ic in range(in_channels):
                for i in range(0, input_h-kernel_h+1, stride):
                    for j in range(0, input_h-kernel_h+1, stride):
                        region = input_feature_map[ind, ic, i:i+kernel_h, j:j+kernel_w]
                        output_feature_map[ind, oc, int(i/stride),int(j/stride)] += torch.sum( region*kernel[oc, ic] )
            output_feature_map[ind, oc] += bias[oc]
            
    return output_feature_map

input_data = torch.randn(2, 2, 5, 5)
kernel = torch.randn(3, 2, 3, 3)
bias = torch.randn(3)
pytorch_api_conv2d_output = F.conv2d(input_data, kernel, bias=bias, padding=1, stride=2)
matmul_for_conv2d_output = matrix_multiplication_for_conv2d(input_data, kernel, bias=bias, padding=1, stride=2)
torch.allclose(pytorch_api_conv2d_output, matmul_for_conv2d_output)

True

## 4. construct a kernel matrix, and use it to do conv2d, finally get transposed conv2d 

In [42]:
def get_kernel_matrix(kernel, input_size):
    kernel_h, kernel_w = kernel.shape
    input_h, input_w = input_size
    output_h = input_h - kernel_h + 1
    output_w = input_w - kernel_w + 1
    num_output_entries = output_h*output_w
    num_kernel_entries = kernel.numel()
    result = torch.zeros(num_output_entries, input_h*input_w) # number of output entries * number of input entries
    count = 0
    for i in range(output_h):
        for j in range(output_w):
            #print(i, input_h-kernel_h-i, j, input_w-kernel_w-j)
            result[count] = torch.flatten(F.pad(kernel, (j, input_w-kernel_w-j, i, input_h-kernel_h-i)))
            count += 1
            
    return result
    
input_data = torch.randn(4,4)
kernel = torch.randn(3,3)
kernel_matrix = get_kernel_matrix(kernel, input_data.shape)
# print(kernel_matrix.shape)
# print(kernel_matrix)

mm_conv_output = kernel_matrix @ input_data.flatten() # resultant size: number of output elements * 1
# unsqueeze twice for the dimensions of batchsize and output channels
pytorch_conv_output = F.conv2d(input_data.unsqueeze(0).unsqueeze(0), kernel.unsqueeze(0).unsqueeze(0)) 
print(mm_conv_output)
print(pytorch_conv_output)

tensor([-2.4418, -6.2556,  4.1600, -1.6744])
tensor([[[[-2.4418, -6.2556],
          [ 4.1600, -1.6744]]]])


### Transposed convolution is used to do upsamling, which will resotre the shape of the original input

In [46]:
## 2*2 -> 4*4
mm_transposed_conv_output = kernel_matrix.transpose(-1,-2) @ mm_conv_output # matrix multiplication
pytorch_transposed_conv_output = F.conv_transpose2d(pytorch_conv_output, kernel.unsqueeze(0).unsqueeze(0)) # Pytorch API
print(mm_transposed_conv_output.reshape(1,1,4,4))
print(pytorch_transposed_conv_output)

tensor([[[[  1.8623,   5.8987,   2.6517,  -0.6089],
          [ -4.1699,   0.0925,  10.9981,   3.3901],
          [ -1.5203, -17.5114,  -6.6500,   3.6920],
          [  5.4847,   2.8560,  -3.8608,   0.7337]]]])
tensor([[[[  1.8623,   5.8987,   2.6516,  -0.6089],
          [ -4.1699,   0.0925,  10.9981,   3.3901],
          [ -1.5203, -17.5114,  -6.6500,   3.6920],
          [  5.4847,   2.8560,  -3.8608,   0.7337]]]])
