In [131]:
import numpy as np
import torch
import torch.nn.functional as F
import tensorflow as tf

In [132]:
# original image dimensions
# B * C * H * W
batch = 6
orig_img = np.arange(125*batch).reshape((batch, 5,5,5))
batch, C, H, W = orig_img.shape

In [133]:
# original kernels
# n_f * C * F1 *F2
F1 = 1  # Height of kernels
F2 = 2  # Widht of kernels
n_f = 5   # Number of kernels
orig_kernels = np.random.randn(n_f, C, F1, F2)
n_f, C, F1, F2 = orig_kernels.shape

In [134]:
# https://stackoverflow.com/a/40840048
# Refer above link for awesome answer on im2col

def im2col(image,kernel_shape,strides=(1,1)):
    A = image
    B = kernel_shape
    skip = strides

    # Parameters 
    batch, D,M,N = A.shape
    col_extent = N - B[1] + 1
    row_extent = M - B[0] + 1

    # Get batch block indices
    batch_idx = np.arange(batch)[:, None, None] * D * M * N

    # Get Starting block indices
    start_idx = np.arange(B[0])[None, :,None]*N + np.arange(B[1])

    # Generate Depth indeces
    didx=M*N*np.arange(D)
    start_idx=(didx[None, :, None]+start_idx.ravel()).reshape((-1,B[0],B[1]))

    # Get offsetted indices across the height and width of input array
    offset_idx = np.arange(row_extent)[None, :, None]*N + np.arange(col_extent)

    # Get all actual indices & index into input array for final output
    act_idx = (batch_idx + 
        start_idx.ravel()[None, :, None] + 
        offset_idx[:,::skip[0],::skip[1]].ravel())

    out = np.take (A, act_idx)
    
    return out

In [135]:
def conv_2D(input, kernel, stride=(1,1), padding=(0,0)):
    """
    Performs 2D convolution operation on image with tensorflow style padding.
    
    input: input image of shape batch * channels * height * width
    kernel: kernel of shape no_of_kernels * channels * height * width
    stride: Stride for height and width
    padding: Either the string 'same','valid'(case sensitive) or list of list/tuples
            list in the form of [[pad_top,pad_bottom],[pad_left,pad_right]]
    
    returns convoluted feature map
    """
    input = np.float32(input)
    S1 ,S2 = stride
    N_K, C, K1, K2 = kernel.shape
    B, C, H, W = input.shape

    if padding == "same": 
        if (H % S1 == 0):
            pad_along_height = np.max(K1 - S1, 0)
        else:
            pad_along_height = np.max(K1 - (H % S1), 0)
        
        if(W % S2 == 0):
            pad_along_width = np.max(K2 - S2, 0)
        else:
            pad_along_width = np.max(K2 - (W % S2), 0)
        
        pad_top = int(np.floor(pad_along_height / 2))
        pad_bottom = int(pad_along_height - pad_top)
        pad_left = int(np.floor(pad_along_width / 2))
        pad_right = int(pad_along_width - pad_left)
            
    elif padding == "valid":
        pad_top, pad_bottom = (0,0)
        pad_left, pad_right = (0,0)
        
    else:
        pad_top, pad_bottom = padding[0]
        pad_left, pad_right = padding[1]

    input = np.pad(input, [(0,0),(0,0),(pad_top,pad_bottom),(pad_left,pad_right)])    
    B, C, H, W = input.shape  
    
    # Output feature map height and width
    H_ = np.int(np.floor((H - K1) / S1) + 1)
    W_ = np.int(np.floor((W - K2) / S2) + 1)
    
    # im2col
    input = im2col(input, (K1,K2), stride)
    
    input = np.hstack((input))
    
    kernel = kernel.reshape(N_K,-1)
    
    input = np.matmul(kernel, input)
    
    input = np.split(np.array(input), B, axis=1)
    
    print(input)
    
    input = np.array(input).reshape(B,N_K,H_,W_)
    
    return input

In [136]:
out = conv_2D(orig_img, orig_kernels, stride=(2,1), padding='valid')
print(out)
print(out.shape)

[array([[-119.42873126, -121.34940117, -123.27007107, -125.19074097,
        -138.6354303 , -140.5561002 , -142.47677011, -144.39744001,
        -157.84212933, -159.76279924, -161.68346914, -163.60413904],
       [ 178.07558889,  178.44938615,  178.82318341,  179.19698067,
         181.8135615 ,  182.18735876,  182.56115602,  182.93495328,
         185.55153411,  185.92533137,  186.29912863,  186.6729259 ],
       [-207.34349011, -209.44561353, -211.54773695, -213.64986038,
        -228.36472434, -230.46684777, -232.56897119, -234.67109461,
        -249.38595858, -251.488082  , -253.59020543, -255.69232885],
       [-195.30562857, -197.13396799, -198.96230741, -200.79064683,
        -213.58902276, -215.41736218, -217.2457016 , -219.07404102,
        -231.87241695, -233.70075637, -235.52909579, -237.35743521],
       [-222.05802364, -226.76039252, -231.4627614 , -236.16513029,
        -269.08171247, -273.78408135, -278.48645024, -283.18881912,
        -316.1054013 , -320.80777019, -325.

In [137]:
# Comparing with pytorch conv2d 
torch_out = F.conv2d(torch.Tensor(orig_img), torch.Tensor(orig_kernels),stride=(2,1))
print(torch_conv.shape)
print(torch_conv)


torch.Size([6, 5, 3, 4])
tensor([[[[ -240.3738,  -241.7134,  -243.0529,  -244.3925],
          [ -253.7694,  -255.1090,  -256.4485,  -257.7881],
          [ -267.1650,  -268.5045,  -269.8441,  -271.1837]],

         [[ -314.8362,  -320.2026,  -325.5690,  -330.9354],
          [ -368.5002,  -373.8665,  -379.2329,  -384.5993],
          [ -422.1641,  -427.5305,  -432.8969,  -438.2632]],

         [[  329.5574,   333.7635,   337.9697,   342.1759],
          [  371.6189,   375.8251,   380.0312,   384.2374],
          [  413.6805,   417.8866,   422.0928,   426.2989]],

         [[   83.2656,    83.5242,    83.7828,    84.0415],
          [   85.8520,    86.1106,    86.3693,    86.6279],
          [   88.4384,    88.6971,    88.9557,    89.2144]],

         [[ -285.2768,  -287.7138,  -290.1509,  -292.5879],
          [ -309.6473,  -312.0844,  -314.5214,  -316.9585],
          [ -334.0178,  -336.4549,  -338.8919,  -341.3289]]],


        [[[ -407.8186,  -409.1581,  -410.4977,  -411.8373],
   

In [138]:
# converting from NCHW --> NHWC for testing with tensorflow
orig_img_ = np.transpose(orig_img, [0,2,3,1]).astype(np.float64)
orig_kernels_ = np.transpose(orig_kernels, [2,3,1,0])

In [139]:
tf_out = tf.nn.conv2d(tf.convert_to_tensor(orig_img_),tf.convert_to_tensor(orig_kernels_),(2,1),padding='VALID')
print(tf_out.shape)
print(tf_out)

(6, 3, 4, 5)
tf.Tensor(
[[[[ -119.42873126   178.07558889  -207.34349011  -195.30562857
     -222.05802364]
   [ -121.34940117   178.44938615  -209.44561353  -197.13396799
     -226.76039252]
   [ -123.27007107   178.82318341  -211.54773695  -198.96230741
     -231.4627614 ]
   [ -125.19074097   179.19698067  -213.64986038  -200.79064683
     -236.16513029]]

  [[ -138.6354303    181.8135615   -228.36472434  -213.58902276
     -269.08171247]
   [ -140.5561002    182.18735876  -230.46684777  -215.41736218
     -273.78408135]
   [ -142.47677011   182.56115602  -232.56897119  -217.2457016
     -278.48645024]
   [ -144.39744001   182.93495328  -234.67109461  -219.07404102
     -283.18881912]]

  [[ -157.84212933   185.55153411  -249.38595858  -231.87241695
     -316.1054013 ]
   [ -159.76279924   185.92533137  -251.488082    -233.70075637
     -320.80777019]
   [ -161.68346914   186.29912863  -253.59020543  -235.52909579
     -325.51013907]
   [ -163.60413904   186.6729259   -255.69232885 