In [4]:
import numpy as np
import torch
import torch.nn.functional as F
np.random.seed(3)
torch.manual_seed(3)

<torch._C.Generator at 0x1148e9570>

In [5]:
image = np.random.randint(low=0, high=255, size=(28,28,3))
filters = np.random.rand(4,3,3,3)

In [6]:
image.shape, filters.shape

((28, 28, 3), (4, 3, 3, 3))

In [7]:
# normalize the image
image = (image - np.mean(image))/np.std(image)

# reshape image
image = image.reshape(3,28,28)

In [8]:
def correlate_2d(input_a, input_b):
    output = 0.0
    size_h, size_w = input_a.shape
    for h in range(size_h):
        for w in range(size_w):
            output+= input_a[h,w]*input_b[h,w]
    return output

def conv2d(image, filters, padding=0, stride=1):
    in_features, image_h, image_w = image.shape
    out_features, in_features, kernel_h, kernel_w = filters.shape

    out_h, out_w = ((image_h - kernel_h + 2*padding)//stride) + 1, \
                    ((image_w - kernel_w + 2*padding)//stride) + 1
    
    out_shape = (out_features, out_h, out_w)

    out_feature_maps = np.zeros(out_shape)

    for b in range(out_features):
        # working on feature map i
        for c in range(in_features):
            # working with kernels present in filter bank b starting with channel c
            kernels_channel = filters[b,c] # (3x3)
            image_channel = image[c] # (28x28)
            for h in range(out_h):
                for w in range(out_w):

                    sub_image = image_channel[h:h+kernel_h,w:w+kernel_w] # (3x3)

                    # add feature map from each channel
                    out_feature_maps[b,h,w] += correlate_2d(sub_image, kernels_channel) 
    
    return out_feature_maps

In [9]:
out_feature_map = conv2d(image=image, filters=filters)
print(out_feature_map.shape)

(4, 26, 26)


In [11]:

img_torch = torch.from_numpy(image).unsqueeze(0) #torch.Size([1, 3, 28, 28])
filter_torch = torch.from_numpy(filters).double() #torch.Size([1, 4, 3, 3])
print(filter_torch.dtype) # torch.float64

torch_out_map = F.conv2d(input=img_torch, weight=filter_torch ).squeeze(0) #torch.Size([4, 26, 26])

print(torch_out_map.shape)

torch.float64
torch.Size([4, 26, 26])


# Comparing my conv2d block result against torch.nn.function conv2d layer result

In [12]:
torch_out_map[0][0]

tensor([ 2.8961,  1.2250,  2.7879, -0.4398, -5.0349, -2.1486,  0.7116,  1.7559,
        -3.0286,  0.8784, -1.8647,  4.9315,  5.7320,  1.3102,  6.7025,  1.0541,
         0.6775,  4.3778,  1.2554, -0.4991, -1.7834, -4.2190, -4.8050, -6.1685,
        -2.3139, -0.1779], dtype=torch.float64)

In [13]:
out_feature_map[0,0]

array([ 2.89606706,  1.2250349 ,  2.78789163, -0.43984179, -5.03493435,
       -2.14861773,  0.71161341,  1.75591783, -3.02859738,  0.87843483,
       -1.86466551,  4.93153718,  5.73203131,  1.31024769,  6.70247558,
        1.05413358,  0.67753347,  4.37777351,  1.25540612, -0.49911672,
       -1.78337572, -4.2190363 , -4.80495791, -6.16850239, -2.31389806,
       -0.17792846])

In [14]:
torch_out_feature_map_numpy = torch_out_map.numpy()

In [15]:
np.average(torch_out_feature_map_numpy-out_feature_map) # error between the two feature maps is close to zero!

-1.030360949215773e-17