In [2]:
import numpy as np

In [24]:
X = np.random.randint(low=0,high=255,size=(5,5))
X

array([[143, 218, 157,  12, 101],
       [189, 190, 222, 208, 146],
       [ 63, 126, 141, 187,  29],
       [218, 217,  31,  61, 174],
       [241,   2,  68, 123, 223]])

In [25]:
X[2, 0] # [row, col]

63

In [26]:
x = 0
y = 0
X[y: y+3, x: x+3]

array([[143, 218, 157],
       [189, 190, 222],
       [ 63, 126, 141]])

$x$ is the column pointer, $y$ is the row pointer. Let's consider row selection ($y$): start from row 0 and select up to 3. Now let's consider column selection ($x$): start from 0 and select up to 3.

It may be strange to see $x$ represent columns, but recall that images have their origin in the top left corner. From this point of view, we can think of moving to the right across columns as the $x$ direction and going down the rows as $y$ direction.

So how can we iterate over every pixel of the image?

In [27]:
x, y = X.shape
for x_i in range(x):
    for y_i in range(y):
        print(f"({x_i}, {y_i})")

(0, 0)
(0, 1)
(0, 2)
(0, 3)
(0, 4)
(1, 0)
(1, 1)
(1, 2)
(1, 3)
(1, 4)
(2, 0)
(2, 1)
(2, 2)
(2, 3)
(2, 4)
(3, 0)
(3, 1)
(3, 2)
(3, 3)
(3, 4)
(4, 0)
(4, 1)
(4, 2)
(4, 3)
(4, 4)


Great! Now let's iterate over a $2 \times 2$ subset of pixels.

In [31]:
X

array([[143, 218, 157,  12, 101],
       [189, 190, 222, 208, 146],
       [ 63, 126, 141, 187,  29],
       [218, 217,  31,  61, 174],
       [241,   2,  68, 123, 223]])

In [37]:
x, y = X.shape
for x_i in range(x):
    # print(X[x_i])
    for y_i in range(y):
        # print(X[:,y_i])
        # S = X[y_i: y_i+2, x_i: x_i+2]
        S = X[x_i: x_i+2, y_i: y_i+2]
        print(S)
        print()
        

[[143 218]
 [189 190]]

[[218 157]
 [190 222]]

[[157  12]
 [222 208]]

[[ 12 101]
 [208 146]]

[[101]
 [146]]

[[189 190]
 [ 63 126]]

[[190 222]
 [126 141]]

[[222 208]
 [141 187]]

[[208 146]
 [187  29]]

[[146]
 [ 29]]

[[ 63 126]
 [218 217]]

[[126 141]
 [217  31]]

[[141 187]
 [ 31  61]]

[[187  29]
 [ 61 174]]

[[ 29]
 [174]]

[[218 217]
 [241   2]]

[[217  31]
 [  2  68]]

[[ 31  61]
 [ 68 123]]

[[ 61 174]
 [123 223]]

[[174]
 [223]]

[[241   2]]

[[ 2 68]]

[[ 68 123]]

[[123 223]]

[[223]]



This scan works as expected but we can see that not all elements are $2 \times 2$ as we expect them to be. This always happens when we're at the edge of the matrix.

One way to fix this is to add zero-padding where we add a certain number of rows and columns to all sides of the matrix. In this case, because we're interested in a $2 \times 2$ view, let's pad $X$ with a single layer of 0's on all sides.

In [40]:
X_pad = np.pad(X, (1,1), mode='constant')
X_pad

array([[  0,   0,   0,   0,   0,   0,   0],
       [  0, 143, 218, 157,  12, 101,   0],
       [  0, 189, 190, 222, 208, 146,   0],
       [  0,  63, 126, 141, 187,  29,   0],
       [  0, 218, 217,  31,  61, 174,   0],
       [  0, 241,   2,  68, 123, 223,   0],
       [  0,   0,   0,   0,   0,   0,   0]])

Great, the padding worked! Now let's try that again.

In [49]:
x, y = X_pad.shape
for x_i in range(x):
    for y_i in range(y):
        if x_i >= (x-2):
            continue
        if y_i >= (y-2):
            continue
        S = X_pad[x_i: x_i+2, y_i: y_i+2]
        print(S)
        print()

[[  0   0]
 [  0 143]]

[[  0   0]
 [143 218]]

[[  0   0]
 [218 157]]

[[  0   0]
 [157  12]]

[[  0   0]
 [ 12 101]]

[[  0 143]
 [  0 189]]

[[143 218]
 [189 190]]

[[218 157]
 [190 222]]

[[157  12]
 [222 208]]

[[ 12 101]
 [208 146]]

[[  0 189]
 [  0  63]]

[[189 190]
 [ 63 126]]

[[190 222]
 [126 141]]

[[222 208]
 [141 187]]

[[208 146]
 [187  29]]

[[  0  63]
 [  0 218]]

[[ 63 126]
 [218 217]]

[[126 141]
 [217  31]]

[[141 187]
 [ 31  61]]

[[187  29]
 [ 61 174]]

[[  0 218]
 [  0 241]]

[[218 217]
 [241   2]]

[[217  31]
 [  2  68]]

[[ 31  61]
 [ 68 123]]

[[ 61 174]
 [123 223]]



Perfect! Now let's look at how we can element-wise multiply $S$ by another matrix representing the kernel which we call $K$.

In [54]:
K = np.random.randint(low=-5, high=5, size=(2,2))
print(K)
print()
print()
x, y = X_pad.shape
for x_i in range(x):
    for y_i in range(y):
        if x_i >= (x-2):
            continue
        if y_i >= (y-2):
            continue
        S = X_pad[x_i: x_i+2, y_i: y_i+2]
        print(K*S)
        print()

[[-3  3]
 [ 3 -1]]


[[   0    0]
 [   0 -143]]

[[   0    0]
 [ 429 -218]]

[[   0    0]
 [ 654 -157]]

[[  0   0]
 [471 -12]]

[[   0    0]
 [  36 -101]]

[[   0  429]
 [   0 -189]]

[[-429  654]
 [ 567 -190]]

[[-654  471]
 [ 570 -222]]

[[-471   36]
 [ 666 -208]]

[[ -36  303]
 [ 624 -146]]

[[  0 567]
 [  0 -63]]

[[-567  570]
 [ 189 -126]]

[[-570  666]
 [ 378 -141]]

[[-666  624]
 [ 423 -187]]

[[-624  438]
 [ 561  -29]]

[[   0  189]
 [   0 -218]]

[[-189  378]
 [ 654 -217]]

[[-378  423]
 [ 651  -31]]

[[-423  561]
 [  93  -61]]

[[-561   87]
 [ 183 -174]]

[[   0  654]
 [   0 -241]]

[[-654  651]
 [ 723   -2]]

[[-651   93]
 [   6  -68]]

[[ -93  183]
 [ 204 -123]]

[[-183  522]
 [ 369 -223]]



Awesome. Now let's sum those matrices.

In [58]:
K = np.random.randint(low=-5, high=5, size=(2,2))
print(K)
print()
print()
x, y = X_pad.shape
for x_i in range(x):
    for y_i in range(y):
        if x_i >= (x-2):
            continue
        if y_i >= (y-2):
            continue
        S = X_pad[x_i: x_i+2, y_i: y_i+2]
        print( (K*S).sum() )
        print()

[[-2 -2]
 [ 4 -2]]


-286

136

558

604

-154

-664

-346

-434

134

314

-504

-758

-602

-670

-18

-562

60

272

-654

-536

-918

90

-624

-158

-424



Let's use these sum values to construct a new matrix $Z$.

In [62]:
Z = np.zeros_like(X) # transformation should have input dim

K = np.random.randint(low=-5, high=5, size=(2,2))
print('Kernel:')
print(K)
print()
print()
x, y = X_pad.shape
for x_i in range(x):
    for y_i in range(y):
        if x_i >= (x-2):
            continue
        if y_i >= (y-2):
            continue
        S = X_pad[x_i: x_i+2, y_i: y_i+2]
        Z[x_i, y_i] = (K*S).sum()
print('Convolved:')
Z

Kernel:
[[ 0  1]
 [ 4 -1]]


Convolved:


array([[-143,  354,  715,  616,  -53],
       [ -46,  784,  695,  692,  787],
       [ 126,  316,  585,  585,  865],
       [-155,  781,  978,  250,   99],
       [ -23, 1179,  -29,  210,  443]])

Done.

Bugfixing.

In [32]:
def convolve(image: np.ndarray, kernel: np.ndarray) -> np.ndarray:
    """
    Convolve an image with a kernel assuming zero-padding of the image to handle the borders

    :param image: the image (either greyscale shape=(rows, cols) or colour shape=(rows, cols, channels))
    :type numpy.ndarray

    :param: kernel: the kernel (shape=(kheight, kwidth); both dimensions odd)
    :type: numpy.ndarray

    :returns the convolved image (of the same shape as the input image)
    :rtype numpy.ndarray
    """
    im_count_row, im_count_col = image.shape
    kern_count_row, kern_count_col = kernel.shape
    if (kern_count_row % 2) == 0:
        raise ValueError('Kernel cannot be an even dimension!')
    if (kern_count_col % 2) == 0:
        raise ValueError('Kernel cannot be an even dimension!')
    out = np.zeros_like(image)
    # Nixon: width of padding should be half the size of kernel
    padding_tb_thickness = np.floor((kern_count_row/2)).astype(int) # top & bottom padding thickness
    padding_lr_thickness = np.floor((kern_count_col/2)).astype(int) # left & right padding thickness
    image_padded = np.pad(image, (padding_tb_thickness, padding_lr_thickness), mode='constant')
    print(f'padded_image:\n{image_padded}')
    centre_pos_x = padding_lr_thickness
    centre_pos_y = padding_tb_thickness
    for y in range(im_count_row):
        centre_pos_y = padding_tb_thickness
        for x in range(im_count_col):
            print(f"x:{x}, y:{y}")
            # guard against going past padding
            # if centre_pos_x > im_count_col:
            #     continue
            # if centre_pos_y > im_count_row:
            #     continue
            # convolve
            image_section = image_padded[y: y+kern_count_row, x: x+kern_count_col]
            out[centre_pos_x,centre_pos_y] = (kernel * image_section).sum()
            print('---')
            print(f"im_sect:\n{image_section}\nctr_pos:{centre_pos_x},{centre_pos_y}")
            print('---')
            
            centre_pos_y += 1
        centre_pos_x += 1
    return out

In [26]:
np.random.seed(0)
image = np.random.randint(low=0, high=255, size=(22,6))
image

array([[172,  47, 117, 192,  67, 251],
       [195, 103,   9, 211,  21, 242],
       [ 36,  87,  70, 216,  88, 140],
       [ 58, 193, 230,  39,  87, 174],
       [ 88,  81, 165,  25,  77,  72],
       [  9, 148, 115, 208, 243, 197],
       [254,  79, 175, 192,  82,  99],
       [216, 177, 243,  29, 147, 147],
       [142, 167,  32, 193,   9, 185],
       [127,  32,  31, 202, 244, 151],
       [163, 254, 203, 114, 183,  28],
       [ 34, 128, 128, 164,  53, 133],
       [ 38, 232, 244,  17,  79, 132],
       [105,  42, 186,  31, 120,   1],
       [ 65, 231, 169,  57,  35, 102],
       [119,  11, 174,  82,  91, 128],
       [142,  99,  53, 140, 121, 170],
       [ 84, 203,  68,   6, 196,  47],
       [127, 244, 131, 204, 100, 180],
       [232,  78, 143, 148, 227, 186],
       [ 23, 207, 141, 117,  85,  48],
       [ 49,  69, 169, 163, 192,  95]])

In [33]:
kernel = (1/9) * np.ones((3,3))
conv_img = convolve(image, kernel)

padded_image:
[[  0   0   0   0   0   0   0   0]
 [  0 172  47 117 192  67 251   0]
 [  0 195 103   9 211  21 242   0]
 [  0  36  87  70 216  88 140   0]
 [  0  58 193 230  39  87 174   0]
 [  0  88  81 165  25  77  72   0]
 [  0   9 148 115 208 243 197   0]
 [  0 254  79 175 192  82  99   0]
 [  0 216 177 243  29 147 147   0]
 [  0 142 167  32 193   9 185   0]
 [  0 127  32  31 202 244 151   0]
 [  0 163 254 203 114 183  28   0]
 [  0  34 128 128 164  53 133   0]
 [  0  38 232 244  17  79 132   0]
 [  0 105  42 186  31 120   1   0]
 [  0  65 231 169  57  35 102   0]
 [  0 119  11 174  82  91 128   0]
 [  0 142  99  53 140 121 170   0]
 [  0  84 203  68   6 196  47   0]
 [  0 127 244 131 204 100 180   0]
 [  0 232  78 143 148 227 186   0]
 [  0  23 207 141 117  85  48   0]
 [  0  49  69 169 163 192  95   0]
 [  0   0   0   0   0   0   0   0]]
x:0, y:0
---
im_sect:
[[  0   0   0]
 [  0 172  47]
 [  0 195 103]]
ctr_pos:1,1
---
x:1, y:0
---
im_sect:
[[  0   0   0]
 [172  47 117]
 [195 103

IndexError: index 6 is out of bounds for axis 1 with size 6

In [146]:
image = np.random.randint(low=0, high=255, size=(8,8))
image

array([[165, 127, 129, 133, 198, 140,  90,  74],
       [251, 182,  78,  62,  72, 199,  45, 133],
       [ 47, 187, 170, 195, 138, 242,  57, 219],
       [ 89, 131, 125, 206,  82, 197, 186, 132],
       [ 17, 197, 191,  94, 152, 131,  69, 168],
       [164,  58, 177, 183, 152, 161, 146,  97],
       [206, 241, 135, 181, 235,  46, 240, 244],
       [127, 161,  81, 157,  12, 118,  46, 118]])

In [46]:
padded = np.pad(image, (1,1), mode='constant')
padded

array([[  0,   0,   0,   0,   0],
       [  0, 173,  87, 193,   0],
       [  0,  70, 234,  53,   0],
       [  0,  48,  94,  59,   0],
       [  0,   0,   0,   0,   0]])

In [106]:
start_lim = 1
end_lim = image.shape[1] - 1
# padded[start_row:end_row,start_col:end_col]
print(padded[0:3,0:3])
print()
print(padded[0:3,1:4])
print()
print(padded[0:3,2:5])
print()
print(padded[1:4,0:3])

[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]

[[  0   0   0]
 [173  87 193]
 [ 70 234  53]]

[[  0   0   0]
 [ 87 193   0]
 [234  53   0]]

[[  0 173  87]
 [  0  70 234]
 [  0  48  94]]


In [151]:
x_start = 0
x_end = 3
y_start = 0
y_end = 3

# print(f"{padded[y_start:y_end,x_start:x_end]}")

for x in range(image.shape[0]):
    for y in range(image.shape[1]):
        print(f"{padded[y_start:y_end,x_start:x_end]}")

[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  0  70 234]]
[[  0   0   0]
 [  0 173  87]
 [  

In [148]:
# vectorisation tricks
print(image.shape)
image

(8, 8)


array([[165, 127, 129, 133, 198, 140,  90,  74],
       [251, 182,  78,  62,  72, 199,  45, 133],
       [ 47, 187, 170, 195, 138, 242,  57, 219],
       [ 89, 131, 125, 206,  82, 197, 186, 132],
       [ 17, 197, 191,  94, 152, 131,  69, 168],
       [164,  58, 177, 183, 152, 161, 146,  97],
       [206, 241, 135, 181, 235,  46, 240, 244],
       [127, 161,  81, 157,  12, 118,  46, 118]])

In [150]:
image[np.array(np.arange(0,3))]

array([[165, 127, 129, 133, 198, 140,  90,  74],
       [251, 182,  78,  62,  72, 199,  45, 133],
       [ 47, 187, 170, 195, 138, 242,  57, 219]])

In [136]:
def convolve2d(image, kernel):
    """
    This function which takes an image and a kernel and returns the convolution of them.

    :param image: a numpy array of size [image_height, image_width].
    :param kernel: a numpy array of size [kernel_height, kernel_width].
    :return: a numpy array of size [image_height, image_width] (convolution output).
    """
    # convolution output
    output = np.zeros_like(image)
    print(image.shape)
    print(image)
    im_count_row, im_count_col = image.shape
    kern_count_row, kern_count_col = kernel.shape

    pad_count_row = np.floor((kern_count_row/2)).astype(int)
    pad_count_col = np.floor((kern_count_col/2)).astype(int)

    # Add zero padding to the input image
    image_padded = np.pad(image, (pad_count_row, pad_count_col), mode='constant')
    print(image_padded.shape)
    print(image_padded)
    print(pad_count_row)
    print(pad_count_col)
    
    # print(f"{padded[y_start:y_end,x_start:x_end]}")

    image_padded[pad_count_row:-pad_count_col, pad_count_col:-pad_count_row] = image
    print(image_padded)

    # Loop over every pixel of the image
    print('image.shape', image.shape)
    for x in range(image.shape[0]):
        for y in range(image.shape[1]):
            # element-wise multiplication of the kernel and the image
            output[x, y]=(kernel * image_padded[x: x+kern_count_col, y: y+kern_count_row]).sum()

    return output

In [137]:
import numpy as np
X = np.random.randint(0,255,(8,10))
K = (1/9) * np.ones((3,3))
convolve2d(X, K)

(8, 10)
[[169 197  50 240 185  94 136 228 180  24]
 [206 233 145  56 108 221  70  45  73 187]
 [ 94  13  40 181 117  54   1  93  93  86]
 [169  14 159 211  65 169 107 114 157  85]
 [ 47 110  11  27 190   7 166 218 202 239]
 [162 147  96 104  28 115 245  82 224 188]
 [ 12 146 210  54 228  34 204 227 250  92]
 [ 98  86 156 236 167  28 147  59 186 116]]
(10, 12)
[[  0   0   0   0   0   0   0   0   0   0   0   0]
 [  0 169 197  50 240 185  94 136 228 180  24   0]
 [  0 206 233 145  56 108 221  70  45  73 187   0]
 [  0  94  13  40 181 117  54   1  93  93  86   0]
 [  0 169  14 159 211  65 169 107 114 157  85   0]
 [  0  47 110  11  27 190   7 166 218 202 239   0]
 [  0 162 147  96 104  28 115 245  82 224 188   0]
 [  0  12 146 210  54 228  34 204 227 250  92   0]
 [  0  98  86 156 236 167  28 147  59 186 116   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0]]
1
1
[[  0   0   0   0   0   0   0   0   0   0   0   0]
 [  0 169 197  50 240 185  94 136 228 180  24   0]
 [  0 206 233 145  56

array([[ 89, 111, 102,  87, 100,  90,  88,  81,  81,  51],
       [101, 127, 128, 124, 139, 109, 104, 102, 112,  71],
       [ 80, 119, 116, 120, 131, 101,  97,  83, 103,  75],
       [ 49,  73,  85, 111, 113,  97, 103, 127, 142,  95],
       [ 72, 101,  97,  99, 101, 121, 135, 168, 167, 121],
       [ 69, 104, 100, 105,  87, 135, 144, 202, 191, 132],
       [ 72, 123, 137, 142, 110, 132, 126, 180, 158, 117],
       [ 38,  78,  98, 116,  83,  89,  77, 119, 103,  71]])