In [1]:
import numpy as np

# 1D convolution (Batch, Width)

In [2]:
def conv1d(data, kernel, stride, padding):
    """
    A function that does 1D convolution.
        -`data`: np.ndarray
        -`kernel`: np.ndarray
        -`stride`: int
        -`padding`: int or tuple
    """
    assert 0 < kernel.ndim < 3, f"Expected kernel to be 1 or 2 dimensional, but got {kernel.ndim}."
    
    if not isinstance(padding, tuple): # change 'int' padding into 'tuple'
        padding = (padding, padding)
    elif isinstance(padding, tuple) and len(padding) == 1: # change the (x,) padding into (x, x)
        padding = (padding[0], padding[0])
    
    data_zero_padded = np.pad(data, ((0, 0), padding)) # zero pad the data
    kernel_len = kernel.shape[0]
    data_len = data_zero_padded.shape[1]
    output_len = data_len - kernel_len + 1

    assert data_len >= kernel_len, f"Expected kernel length to be smaller or equal to the input lenght, but got kernel of length {kernel_len} and input of length {data_len}."
    
    # get indices of the kernel-sized windows
    indices = np.arange(kernel_len) + np.arange(output_len, step=stride)[..., None]
    
    # apply the kernel on the data and sum the elements of each window to get the output (convolution result)
    result = (data_zero_padded[:, indices] * kernel).sum(axis=-1)

    # Explanatory information
    print(f"Data: {data}")
    print(f"Zero-padded Data: {data_zero_padded}")
    print(f"Zero-padded Data length: {data_len}\n")
    print(f"Kernel: {kernel}")
    print(f"Kernel length: {kernel_len}\n")
    print(f"Output length: {output_len}\n")
    print(f"Indices:\n{indices}\n")
    print(f"Data at the indices:\n{data_zero_padded[:, indices]}\n")
    print(f"Data after applying the kernel {kernel}:\n{data_zero_padded[:, indices] * kernel}\n")
    print(f"Sum the elements of each window. Output: ")
    
    return result

##### Example:

In [3]:
data = np.array([[1, 0, 2, 3, 0, 1, 1]])  
kernel = np.array([2, 1, 3])
stride = 1
padding = (2, 2)

In [4]:
conv1d(data, kernel, stride, padding)

Data: [[1 0 2 3 0 1 1]]
Zero-padded Data: [[0 0 1 0 2 3 0 1 1 0 0]]
Zero-padded Data length: 11

Kernel: [2 1 3]
Kernel length: 3

Output length: 9

Indices:
[[ 0  1  2]
 [ 1  2  3]
 [ 2  3  4]
 [ 3  4  5]
 [ 4  5  6]
 [ 5  6  7]
 [ 6  7  8]
 [ 7  8  9]
 [ 8  9 10]]

Data at the indices:
[[[0 0 1]
  [0 1 0]
  [1 0 2]
  [0 2 3]
  [2 3 0]
  [3 0 1]
  [0 1 1]
  [1 1 0]
  [1 0 0]]]

Data after applying the kernel [2 1 3]:
[[[0 0 3]
  [0 1 0]
  [2 0 6]
  [0 2 9]
  [4 3 0]
  [6 0 3]
  [0 1 3]
  [2 1 0]
  [2 0 0]]]

Sum the elements of each window. Output: 


array([[ 3,  1,  8, 11,  7,  9,  4,  3,  2]])

# 2D convolution (Batch, Height, Width)

In [5]:
def conv2d(data, kernel, stride, padding):
    """
    A function that does 2D convolution.
        -`data`: np.ndarray
        -`kernel`: np.ndarray
        -`stride`: int
        -`padding`: int or tuple
    """
    kernel_shape = kernel.shape
    assert kernel_shape[-1] == kernel_shape[-2], f"Expected the kernel to be a square matrix, but got shape {kernel_shape}." 
    
    if isinstance(padding, int): # change 'int' padding into ((0, 0), (x, x), (x, x))
        padding = ((0, 0), (padding, padding), (padding, padding))
    elif len(padding) == 1: # change the (x,) padding into ((0, 0), (x, x), (x, x))
        padding = ((0, 0), (padding[0], padding[0]), (padding[0], padding[0]))
    elif len(padding) == 2: # change the (x, y) padding into ((0, 0), (x, x), (y, y)) if x, y are 'int', else ((0, 0), x, y)
        if isinstance(padding[0], int):
            padding_1 = (padding[0], padding[0])
        else:
            padding_1 = padding[0]

        if isinstance(padding[1], int):
            padding_2 = (padding[1], padding[1])
        else:
            padding_2 = padding[1]

        padding = ((0, 0), padding_1, padding_2)
    
    data_zero_padded = np.pad(data, padding) # zero pad the data
    kernel_len = kernel_shape[0]
    data_shape = np.array(data_zero_padded[0].shape)
    output_shape = data_shape - kernel_len + 1
    
    range_1 = np.arange(kernel_len)
    range_2 = np.arange(output_shape[0], step=stride)
    range_3 = np.arange(output_shape[1], step=stride)
    
    # get indices of the kernel-sized windows    
    indices_i = (range_1[..., None].repeat(kernel_len, axis=1)[None] + range_2[:, None, None])[:, None].repeat(len(range_3), axis=1)
    indices_j = (range_1[None].repeat(kernel_len, axis=0)[None] + range_3[:, None, None])[None].repeat(len(range_2), axis=0)

    # apply the kernel on the data and sum the elements of each window to get the output (convolution result)
    result = (data_zero_padded[:, indices_i, indices_j] * kernel).sum(axis=(-1, -2))

    # Explanatory information
    print(f"Data:\n{data}")
    print(f"Zero-padded Data:\n{data_zero_padded}")
    print(f"Zero-padded Data shape: {data_shape}\n")
    print(f"Kernel:\n{kernel}")
    print(f"Kernel length: {kernel_len}\n")
    print(f"Output shape: {output_shape}\n")
    print(f"Indices i:\n{indices_i}\n\nIndices j:\n{indices_j}\n")
    print(f"Data at the indices:\n{data_zero_padded[:, indices_i, indices_j]}\n")
    print(f"Data after applying the kernel:\n{kernel}\n\n{data_zero_padded[:, indices_i, indices_j] * kernel}\n")
    print(f"Sum the elements of each window. Output: ")
    
    return result

##### Example:

In [6]:
data = np.array([[[1, 1, 1],
                  [2, 0, -1],
                  [4, 7, 8],
                  [3, -1, -1]]]) 
kernel = np.array([[0, 0],
                   [1, -1]])
stride = 1
padding = 1

In [7]:
conv2d(data, kernel, stride, padding)

Data:
[[[ 1  1  1]
  [ 2  0 -1]
  [ 4  7  8]
  [ 3 -1 -1]]]
Zero-padded Data:
[[[ 0  0  0  0  0]
  [ 0  1  1  1  0]
  [ 0  2  0 -1  0]
  [ 0  4  7  8  0]
  [ 0  3 -1 -1  0]
  [ 0  0  0  0  0]]]
Zero-padded Data shape: [6 5]

Kernel:
[[ 0  0]
 [ 1 -1]]
Kernel length: 2

Output shape: [5 4]

Indices i:
[[[[0 0]
   [1 1]]

  [[0 0]
   [1 1]]

  [[0 0]
   [1 1]]

  [[0 0]
   [1 1]]]


 [[[1 1]
   [2 2]]

  [[1 1]
   [2 2]]

  [[1 1]
   [2 2]]

  [[1 1]
   [2 2]]]


 [[[2 2]
   [3 3]]

  [[2 2]
   [3 3]]

  [[2 2]
   [3 3]]

  [[2 2]
   [3 3]]]


 [[[3 3]
   [4 4]]

  [[3 3]
   [4 4]]

  [[3 3]
   [4 4]]

  [[3 3]
   [4 4]]]


 [[[4 4]
   [5 5]]

  [[4 4]
   [5 5]]

  [[4 4]
   [5 5]]

  [[4 4]
   [5 5]]]]

Indices j:
[[[[0 1]
   [0 1]]

  [[1 2]
   [1 2]]

  [[2 3]
   [2 3]]

  [[3 4]
   [3 4]]]


 [[[0 1]
   [0 1]]

  [[1 2]
   [1 2]]

  [[2 3]
   [2 3]]

  [[3 4]
   [3 4]]]


 [[[0 1]
   [0 1]]

  [[1 2]
   [1 2]]

  [[2 3]
   [2 3]]

  [[3 4]
   [3 4]]]


 [[[0 1]
   [0 1]]

  [[1 2]
  

array([[[-1,  0,  0,  1],
        [-2,  2,  1, -1],
        [-4, -3, -1,  8],
        [-3,  4,  0, -1],
        [ 0,  0,  0,  0]]])

# 3D convolution (Batch, Channel, Height, Width) <=> torch.nn.Conv2d

In [8]:
def conv3d(data, kernel, stride, padding):
    """
    A function that does 3D convolution.
        -`data`: np.ndarray
        -`kernel`: np.ndarray
        -`stride`: int
        -`padding`: int or tuple
    """
    kernel_shape = kernel.shape
    assert kernel_shape[-1] == kernel_shape[-2], f"Expected the kernel to be a square matrix, but got shape {kernel_shape}." 

    if isinstance(padding, int): # change 'int' padding into ((0, 0), (0, 0), (x, x), (x, x))
        padding = ((0, 0), (0, 0), (padding, padding), (padding, padding))
    elif len(padding) == 1: # change the (x,) padding into ((0, 0), (0, 0), (x, x), (x, x))
        padding = ((0, 0), (0, 0), (padding[0], padding[0]), (padding[0], padding[0]))
    elif len(padding) == 2: # change the (x, y) padding into ((0, 0), (0, 0), (x, x), (y, y)) if x, y are 'int', else ((0, 0), x, y)
        if isinstance(padding[0], int):
            padding_1 = (padding[0], padding[0])
        else:
            padding_1 = padding[0]

        if isinstance(padding[1], int):
            padding_2 = (padding[1], padding[1])
        else:
            padding_2 = padding[1]

        padding = ((0, 0), (0, 0), padding_1, padding_2)
    
    data_zero_padded = np.pad(data, padding) # zero pad the data
    kernel_len = kernel_shape[0]
    data_shape = np.array(data_zero_padded[0].shape)
    output_shape =  [data_shape[0]] + list(data_shape[1:]-kernel_len+1)

    range_1 = np.arange(kernel_len)
    range_2 = np.arange(output_shape[1], step=stride)
    range_3 = np.arange(output_shape[2], step=stride)
    
    # get indices of the kernel-sized windows    
    indices_i = (range_1[..., None].repeat(kernel_len, axis=1)[None] + range_2[:, None, None])[:, None].repeat(len(range_3), axis=1)
    indices_j = (range_1[None].repeat(kernel_len, axis=0)[None] + range_3[:, None, None])[None].repeat(len(range_2), axis=0)

    # apply the kernel on the data and sum the elements of each window to get the output (convolution result)
    result = (data_zero_padded[:, :, indices_i, indices_j] * kernel).sum(axis=(-1, -2))
    
    # Explanatory information
    print(f"Data:\n{data}")
    print(f"Zero-padded Data:\n{data_zero_padded}")
    print(f"Zero-padded Data shape: {data_shape}\n")
    print(f"Kernel:\n{kernel}")
    print(f"Kernel length: {kernel_len}\n")
    print(f"Output shape: {output_shape}\n")
    print(f"Indices i:\n{indices_i}\n\nIndices j:\n{indices_j}\n")
    print(f"Data at the indices:\n{data_zero_padded[:, :, indices_i, indices_j]}\n")
    print(f"Data after applying the kernel:\n{kernel}\n\n{data_zero_padded[:, :, indices_i, indices_j] * kernel}\n")
    print(f"Sum the elements of each window. Output: ")
    
    return result

##### Example:

In [9]:
data = np.array([[[[12, 15, 27],
                   [28, 14,  0]],
                  [[13,  7, 17],
                   [25,  9,  4]],
                  [[18, 14,  2],
                   [ 0,  5, 22]]]])
kernel = np.array([[1, 0],
                   [0, 1]])
stride = 1
padding = 1

In [10]:
conv3d(data, kernel, stride, padding)

Data:
[[[[12 15 27]
   [28 14  0]]

  [[13  7 17]
   [25  9  4]]

  [[18 14  2]
   [ 0  5 22]]]]
Zero-padded Data:
[[[[ 0  0  0  0  0]
   [ 0 12 15 27  0]
   [ 0 28 14  0  0]
   [ 0  0  0  0  0]]

  [[ 0  0  0  0  0]
   [ 0 13  7 17  0]
   [ 0 25  9  4  0]
   [ 0  0  0  0  0]]

  [[ 0  0  0  0  0]
   [ 0 18 14  2  0]
   [ 0  0  5 22  0]
   [ 0  0  0  0  0]]]]
Zero-padded Data shape: [3 4 5]

Kernel:
[[1 0]
 [0 1]]
Kernel length: 2

Output shape: [3, 3, 4]

Indices i:
[[[[0 0]
   [1 1]]

  [[0 0]
   [1 1]]

  [[0 0]
   [1 1]]

  [[0 0]
   [1 1]]]


 [[[1 1]
   [2 2]]

  [[1 1]
   [2 2]]

  [[1 1]
   [2 2]]

  [[1 1]
   [2 2]]]


 [[[2 2]
   [3 3]]

  [[2 2]
   [3 3]]

  [[2 2]
   [3 3]]

  [[2 2]
   [3 3]]]]

Indices j:
[[[[0 1]
   [0 1]]

  [[1 2]
   [1 2]]

  [[2 3]
   [2 3]]

  [[3 4]
   [3 4]]]


 [[[0 1]
   [0 1]]

  [[1 2]
   [1 2]]

  [[2 3]
   [2 3]]

  [[3 4]
   [3 4]]]


 [[[0 1]
   [0 1]]

  [[1 2]
   [1 2]]

  [[2 3]
   [2 3]]

  [[3 4]
   [3 4]]]]

Data at the indices:
[[[[

array([[[[12, 15, 27,  0],
         [28, 26, 15, 27],
         [ 0, 28, 14,  0]],

        [[13,  7, 17,  0],
         [25, 22, 11, 17],
         [ 0, 25,  9,  4]],

        [[18, 14,  2,  0],
         [ 0, 23, 36,  2],
         [ 0,  0,  5, 22]]]])