In [1]:
import numpy as np
import torch
import torch.nn.functional as F
import tensorflow as tf

In [2]:
# original image dimensions
# B * C * H * W
batch = 6
orig_img = np.arange(125*batch).reshape((batch, 5,5,5))
batch, C, H, W = orig_img.shape

In [3]:
# original kernels
# n_f * C * F1 *F2
F1 = 1  # Height of kernels
F2 = 2  # Widht of kernels
n_f = 5   # Number of kernels
orig_kernels = np.random.randn(n_f, C, F1, F2)
n_f, C, F1, F2 = orig_kernels.shape

In [4]:
# https://stackoverflow.com/a/40840048
# Refer above link for awesome answer on im2col

def im2col(image,kernel_shape,strides=(1,1)):
    A = image
    B = kernel_shape
    skip = strides

    # Parameters 
    batch, D,M,N = A.shape
    col_extent = N - B[1] + 1
    row_extent = M - B[0] + 1

    # Get batch block indices
    batch_idx = np.arange(batch)[:, None, None] * D * M * N

    # Get Starting block indices
    start_idx = np.arange(B[0])[None, :,None]*N + np.arange(B[1])

    # Generate Depth indeces
    didx=M*N*np.arange(D)
    start_idx=(didx[None, :, None]+start_idx.ravel()).reshape((-1,B[0],B[1]))

    # Get offsetted indices across the height and width of input array
    offset_idx = np.arange(row_extent)[None, :, None]*N + np.arange(col_extent)

    # Get all actual indices & index into input array for final output
    act_idx = (batch_idx + 
        start_idx.ravel()[None, :, None] + 
        offset_idx[:,::skip[0],::skip[1]].ravel())

    out = np.take (A, act_idx)
    
    return out

In [5]:
def conv_2D(input, kernel, stride=(1,1), padding=(0,0)):
    """
    Performs 2D convolution operation on image with tensorflow style padding.
    
    input: input image of shape batch * channels * height * width
    kernel: kernel of shape no_of_kernels * channels * height * width
    stride: Stride for height and width
    padding: Either the string 'same','valid'(case sensitive) or list of list/tuples
            list in the form of [[pad_top,pad_bottom],[pad_left,pad_right]]
    
    returns convoluted feature map
    """
    input = np.float32(input)
    S1 ,S2 = stride
    N_K, C, K1, K2 = kernel.shape
    B, C, H, W = input.shape

    if padding == "same": 
        if (H % S1 == 0):
            pad_along_height = np.max(K1 - S1, 0)
        else:
            pad_along_height = np.max(K1 - (H % S1), 0)
        
        if(W % S2 == 0):
            pad_along_width = np.max(K2 - S2, 0)
        else:
            pad_along_width = np.max(K2 - (W % S2), 0)
        
        pad_top = int(np.floor(pad_along_height / 2))
        pad_bottom = int(pad_along_height - pad_top)
        pad_left = int(np.floor(pad_along_width / 2))
        pad_right = int(pad_along_width - pad_left)
            
    elif padding == "valid":
        pad_top, pad_bottom = (0,0)
        pad_left, pad_right = (0,0)
        
    else:
        pad_top, pad_bottom = padding[0]
        pad_left, pad_right = padding[1]

    input = np.pad(input, [(0,0),(0,0),(pad_top,pad_bottom),(pad_left,pad_right)])    
    B, C, H, W = input.shape  
    
    # Output feature map height and width
    H_ = np.int(np.floor((H - K1) / S1) + 1)
    W_ = np.int(np.floor((W - K2) / S2) + 1)
    
    # im2col
    input = im2col(input, (K1,K2), stride)
    
    input = np.hstack((input))
    
    kernel = kernel.reshape(N_K,-1)
    
    input = np.matmul(kernel, input)
    
    input = np.split(np.array(input), B, axis=1)
    
    print(input)
    
    input = np.array(input).reshape(B,N_K,H_,W_)
    
    return input

In [6]:
out = conv_2D(orig_img, orig_kernels, stride=(2,1), padding='valid')
print(out)
print(out.shape)

[array([[  50.43241712,   51.42448027,   52.41654343,   53.40860658,
          60.35304864,   61.3451118 ,   62.33717495,   63.3292381 ,
          70.27368016,   71.26574332,   72.25780647,   73.24986962],
       [ -77.08590822,  -78.14903683,  -79.21216544,  -80.27529405,
         -87.71719432,  -88.78032293,  -89.84345154,  -90.90658015,
         -98.34848042,  -99.41160903, -100.47473765, -101.53786626],
       [-137.01105448, -138.48600277, -139.96095107, -141.43589937,
        -151.76053746, -153.23548576, -154.71043406, -156.18538236,
        -166.51002045, -167.98496874, -169.45991704, -170.93486534],
       [-250.88725835, -253.47299276, -256.05872717, -258.64446157,
        -276.74460242, -279.33033682, -281.91607123, -284.50180563,
        -302.60194648, -305.18768088, -307.77341529, -310.3591497 ],
       [  50.29882583,   50.75882163,   51.21881742,   51.67881322,
          54.89878378,   55.35877957,   55.81877537,   56.27877116,
          59.49874173,   59.95873752,   60.

In [7]:
# Comparing with pytorch conv2d 
torch_out = F.conv2d(torch.Tensor(orig_img), torch.Tensor(orig_kernels),stride=(2,1))
print(torch_out.shape)
print(torch_out)


torch.Size([6, 5, 3, 4])
tensor([[[[   50.4324,    51.4245,    52.4165,    53.4086],
          [   60.3530,    61.3451,    62.3372,    63.3292],
          [   70.2737,    71.2657,    72.2578,    73.2499]],

         [[  -77.0859,   -78.1490,   -79.2122,   -80.2753],
          [  -87.7172,   -88.7803,   -89.8435,   -90.9066],
          [  -98.3485,   -99.4116,  -100.4747,  -101.5379]],

         [[ -137.0111,  -138.4860,  -139.9610,  -141.4359],
          [ -151.7605,  -153.2355,  -154.7104,  -156.1854],
          [ -166.5100,  -167.9850,  -169.4599,  -170.9349]],

         [[ -250.8873,  -253.4730,  -256.0587,  -258.6444],
          [ -276.7446,  -279.3304,  -281.9161,  -284.5018],
          [ -302.6020,  -305.1877,  -307.7734,  -310.3592]],

         [[   50.2988,    50.7588,    51.2188,    51.6788],
          [   54.8988,    55.3588,    55.8188,    56.2788],
          [   59.4987,    59.9587,    60.4187,    60.8787]]],


        [[[  174.4403,   175.4324,   176.4244,   177.4165],
   

In [8]:
# converting from NCHW --> NHWC for testing with tensorflow
orig_img_ = np.transpose(orig_img, [0,2,3,1]).astype(np.float64)
orig_kernels_ = np.transpose(orig_kernels, [2,3,1,0])

In [9]:
tf_out = tf.nn.conv2d(tf.convert_to_tensor(orig_img_),tf.convert_to_tensor(orig_kernels_),(2,1),padding='VALID')
print(tf_out.shape)
print(tf_out)

(6, 3, 4, 5)
tf.Tensor(
[[[[   50.43241712   -77.08590822  -137.01105448  -250.88725835
       50.29882583]
   [   51.42448027   -78.14903683  -138.48600277  -253.47299276
       50.75882163]
   [   52.41654343   -79.21216544  -139.96095107  -256.05872717
       51.21881742]
   [   53.40860658   -80.27529405  -141.43589937  -258.64446157
       51.67881322]]

  [[   60.35304864   -87.71719432  -151.76053746  -276.74460242
       54.89878378]
   [   61.3451118    -88.78032293  -153.23548576  -279.33033682
       55.35877957]
   [   62.33717495   -89.84345154  -154.71043406  -281.91607123
       55.81877537]
   [   63.3292381    -90.90658015  -156.18538236  -284.50180563
       56.27877116]]

  [[   70.27368016   -98.34848042  -166.51002045  -302.60194648
       59.49874173]
   [   71.26574332   -99.41160903  -167.98496874  -305.18768088
       59.95873752]
   [   72.25780647  -100.47473765  -169.45991704  -307.77341529
       60.41873331]
   [   73.24986962  -101.53786626  -170.93486534