In [1]:
import numpy as np

In [2]:
image = np.random.randint(low=0, high=255, size=(28,28,3))
filters = np.random.rand(4,3,3,3)

In [3]:
image.shape, filters.shape

((28, 28, 3), (4, 3, 3, 3))

In [4]:
# normalize the image
image = (image - np.mean(image))/np.std(image)

# reshape image
image = image.reshape(3,28,28)

In [5]:
def correlate_2d(input_a, input_b):
    output = 0.0
    size_h, size_w = input_a.shape
    for h in range(size_h):
        for w in range(size_w):
            output+= input_a[h,w]*input_b[h,w]
    return output

def conv2d(image, filters, padding=0, stride=1):
    in_features, image_h, image_w = image.shape
    out_features, in_features, kernel_h, kernel_w = filters.shape

    out_h, out_w = ((image_h - kernel_h + 2*padding)//stride) + 1, \
                    ((image_w - kernel_w + 2*padding)//stride) + 1
    out_shape = (out_features, out_h, out_w)

    out_feature_maps = np.zeros(out_shape)

    for b in range(out_features):
        # working on feature map i
        for c in range(in_features):
            # working with kernels present in filter bank b starting with channel c
            kernels_channel = filters[b,c] # (3x3)
            image_channel = image[c] # (28x28)
            for h in range(out_h):
                for w in range(out_w):
                    sub_image = image_channel[h:h+kernel_h,w:w+kernel_w] # (3x3)
                    out_feature_maps[b,h,w] += correlate_2d(sub_image, kernels_channel) # add feature map from each channel
    
    return out_feature_maps


In [8]:
out_feature_map = conv2d(image=image, filters=filters)
print(out_feature_map.shape)

(4, 26, 26)


In [7]:
import torch
import torch.nn.functional as F
img_torch = torch.from_numpy(image).unsqueeze(0) #torch.Size([1, 3, 28, 28])
filter_torch = torch.from_numpy(filters).double() #torch.Size([1, 4, 3, 3])
print(filter_torch.dtype)
torch_out_map = F.conv2d(input=img_torch, weight=filter_torch ).squeeze(0) #torch.Size([4, 26, 26])

print(torch_out_map.shape)

torch.float64
torch.Size([4, 26, 26])


# Comparing my conv2d block result against torch.nn.function conv2d layer result

In [9]:
torch_out_map[0][0]

tensor([ 0.3750, -0.5946,  4.0895,  6.7314,  2.0902, -0.8388, -3.5884, -3.4262,
         3.4433, -0.2526,  3.1783,  3.5102,  2.4618,  0.2534, -0.8105, -3.4138,
        -1.7774, -2.9848, -2.5354, -2.0769,  2.5116,  0.6277, -1.1215, -6.1133,
        -2.2196, -1.1763], dtype=torch.float64)

In [10]:
out_feature_map[0,0]

array([ 0.37496551, -0.59459855,  4.08952516,  6.73139188,  2.09022551,
       -0.83877008, -3.58835423, -3.42622705,  3.44327755, -0.25255388,
        3.17825356,  3.51015505,  2.46181831,  0.25344319, -0.81046311,
       -3.41375309, -1.77743724, -2.98484678, -2.53541136, -2.07690163,
        2.5116195 ,  0.62768895, -1.12153036, -6.11330317, -2.21961838,
       -1.17625347])

In [11]:
torch_out_feature_map_numpy = torch_out_map.numpy()

In [13]:
np.average(torch_out_feature_map_numpy-out_feature_map) # error between the two feature maps is close to zero!

-1.129904361417684e-18