In [6]:
import numpy as np


In [24]:
# Implementing convolution layer

class Conv3x3:
    # convolution layer with 3x3 filter

    def __init__(self, num_filters):
        self.num_filters = num_filters

        # filters is a 3d array with dimensions(num_filters,3,3)
        # We divide by 9 to reduce the variance of our initial values
        self.filters = np.random.randn(num_filters, 3, 3) / 9

    def iterate_regions(self, image):
        '''
        Generates all possible 3x3 image regions using valid padding.
        - image is a 2d numpy array
        '''
        h, w = image.shape
        
    
        for i in range(h - 2):
            
            for j in range(w - 2):
                
                im_region = image[i:(i + 3), j:(j + 3)] # 3x3 array contain relevant image region
                
                yield im_region, i, j

    def forward(self, input):
        '''
        Performs a forward pass of the conv layer using the given input.
        Returns a 3d numpy array with dimensions (h, w, num_filters).
        - input is a 2d numpy array
        '''
        h, w = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))
    
        for im_region, i, j in self.iterate_regions(input):
          output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
    
        return output

# iterate_regions(self, image)
Given a 2D input image (like a single grayscale image), this method yields every possible 3×3 region in the image (sliding window).

It does not apply the filter; it just extracts the regions for later use.

**We have im_region, a 3x3 array containing the relevant image region.
We have self.filters, a 3d array.
We do im_region * self.filters, which uses numpy’s broadcasting feature to element-wise multiply the two arrays. The result is a 3d array with the same dimension as self.filters.
We np.sum() the result of the previous step using axis=(1, 2), which produces a 1d array of length num_filters where each element contains the convolution result for the corresponding filter.
We assign the result to output[i, j], which contains convolution results for pixel (i, j) in the output.**

In [14]:
import numpy as np

image = np.array([
    [1, 2, 3, 0, 1],
    [0, 1, 2, 3, 1],
    [1, 0, 1, 2, 2],
    [2, 1, 0, 1, 1],
    [0, 1, 2, 1, 0]
])


In [17]:
conv = Conv3x3(1)
conv.iterate_regions(image)

<generator object Conv3x3.iterate_regions at 0x0000027F7F623BC0>

In [20]:
conv = Conv3x3(1)

# Loop over each region generated by the function
for region, i, j in conv.iterate_regions(image):
    print(f"Region at ({i}, {j}):")
    print(region)

5 5
i :0
j :0
region [[1 2 3]
 [0 1 2]
 [1 0 1]]
Region at (0, 0):
[[1 2 3]
 [0 1 2]
 [1 0 1]]
j :1
region [[2 3 0]
 [1 2 3]
 [0 1 2]]
Region at (0, 1):
[[2 3 0]
 [1 2 3]
 [0 1 2]]
j :2
region [[3 0 1]
 [2 3 1]
 [1 2 2]]
Region at (0, 2):
[[3 0 1]
 [2 3 1]
 [1 2 2]]
i :1
j :0
region [[0 1 2]
 [1 0 1]
 [2 1 0]]
Region at (1, 0):
[[0 1 2]
 [1 0 1]
 [2 1 0]]
j :1
region [[1 2 3]
 [0 1 2]
 [1 0 1]]
Region at (1, 1):
[[1 2 3]
 [0 1 2]
 [1 0 1]]
j :2
region [[2 3 1]
 [1 2 2]
 [0 1 1]]
Region at (1, 2):
[[2 3 1]
 [1 2 2]
 [0 1 1]]
i :2
j :0
region [[1 0 1]
 [2 1 0]
 [0 1 2]]
Region at (2, 0):
[[1 0 1]
 [2 1 0]
 [0 1 2]]
j :1
region [[0 1 2]
 [1 0 1]
 [1 2 1]]
Region at (2, 1):
[[0 1 2]
 [1 0 1]
 [1 2 1]]
j :2
region [[1 2 2]
 [0 1 1]
 [2 1 0]]
Region at (2, 2):
[[1 2 2]
 [0 1 1]
 [2 1 0]]


In [26]:
conv = Conv3x3(1)
output = conv.forward(image)
output

array([[[-0.32132096],
        [-0.49659488],
        [-0.15307762]],

       [[ 0.20196325],
        [-0.32132096],
        [-0.39668205]],

       [[-0.12157863],
        [-0.10261442],
        [-0.05254872]]])

# Implementing backprop

In [27]:
class Conv3x3:
  # A Convolution layer using 3x3 filters.

  def __init__(self, num_filters):
    self.num_filters = num_filters

    # filters is a 3d array with dimensions (num_filters, 3, 3)
    # We divide by 9 to reduce the variance of our initial values
    self.filters = np.random.randn(num_filters, 3, 3) / 9

  def iterate_regions(self, image):
    '''
    Generates all possible 3x3 image regions using valid padding.
    - image is a 2d numpy array.
    '''
    h, w = image.shape

    for i in range(h - 2):
      for j in range(w - 2):
        im_region = image[i:(i + 3), j:(j + 3)]
        yield im_region, i, j

  def forward(self, input):
    '''
    Performs a forward pass of the conv layer using the given input.
    Returns a 3d numpy array with dimensions (h, w, num_filters).
    - input is a 2d numpy array
    '''
    self.last_input = input

    h, w = input.shape
    output = np.zeros((h - 2, w - 2, self.num_filters))

    for im_region, i, j in self.iterate_regions(input):
      output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))

    return output

  def backprop(self, d_L_d_out, learn_rate):
    '''
    Performs a backward pass of the conv layer.
    - d_L_d_out is the loss gradient for this layer's outputs.
    - learn_rate is a float.
    '''
    d_L_d_filters = np.zeros(self.filters.shape)

    for im_region, i, j in self.iterate_regions(self.last_input):
      for f in range(self.num_filters):
        d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region

    # Update filters
    self.filters -= learn_rate * d_L_d_filters

    # We aren't returning anything here since we use Conv3x3 as the first layer in our CNN.
    # Otherwise, we'd need to return the loss gradient for this layer's inputs, just like every
    # other layer in our CNN.
    return None