In [1]:
import numpy as np

In [3]:
# define image
image=np.array([[1,2,3,0,1],
              [0,1,2,3,2],
              [3,0,1,2,1],
              [2,1,3,0,0],
              [1,2,0,1,2]])
image.shape

(5, 5)

In [4]:
# define kernel
kernel=np.array([[0,1,0],
                 [1,-4,1],
                 [0,1,0]])
kernel.shape

(3, 3)

In [6]:
# define com=nvolution operation
def convolve(image,kernel):
  image_h,image_w=image.shape
  kernel_h,kernel_w=kernel.shape
  output_h=image_h-kernel_h+1
  output_w=image_w-kernel_w+1
  output=np.zeros((output_h,output_w))

  for i in range(output_h):
    for j in range(output_w):
      output[i, j] = np.sum(image[i:i+kernel_h, j:j+kernel_w] * kernel)
  return output
# appply the  convolution
output=convolve(image,kernel)
print(output)


[[  0.   0.  -6.]
 [  6.   3.  -3.]
 [  3. -10.   6.]]


# Padding with stride = 1

1. Padding: Zero-padding is added to the image to control the spatial dimensions of the output.
2. Stride: Controls how much the filter moves at each step. A stride of 1 means the filter moves one pixel at a time.
3. Convolution with Padding and Stride: The output is now controlled by the padding and stride, allowing us to maintain or reduce the spatial dimensions of the output.

In [8]:
# adding padding
def pad_image(image,pad):
  return np.pad(image,pad,mode="constant",constant_values=0)

pad_image(image,1)

array([[0, 0, 0, 0, 0, 0, 0],
       [0, 1, 2, 3, 0, 1, 0],
       [0, 0, 1, 2, 3, 2, 0],
       [0, 3, 0, 1, 2, 1, 0],
       [0, 2, 1, 3, 0, 0, 0],
       [0, 1, 2, 0, 1, 2, 0],
       [0, 0, 0, 0, 0, 0, 0]])

In [11]:
# convolution with padding  and stride
def convolve_with_padding_and_stride(image, kernel, stride=1, padding=0):
    if padding > 0:
        image = pad_image(image, padding)

    image_h, image_w = image.shape
    kernel_h, kernel_w = kernel.shape
    output_h = (image_h - kernel_h) // stride + 1
    output_w = (image_w - kernel_w) // stride + 1
    output = np.zeros((output_h, output_w))

    for i in range(0, output_h * stride, stride):
        for j in range(0, output_w * stride, stride):
            output[i // stride, j // stride] = np.sum(
                image[i:i+kernel_h, j:j+kernel_w] * kernel
            )

    return output
convolve_with_padding_and_stride(image,kernel,stride=1,padding=1)

array([[ -2.,  -3.,  -8.,   7.,  -2.],
       [  5.,   0.,   0.,  -6.,  -3.],
       [-10.,   6.,   3.,  -3.,   0.],
       [ -3.,   3., -10.,   6.,   3.],
       [  0.,  -6.,   6.,  -2.,  -7.]])

# Max Pooling

In [14]:
def max_pooling(image,pool_size,stride):
    image_h, image_w = image.shape
    output_h = (image_h - pool_size) // stride + 1
    output_w = (image_w - pool_size) // stride + 1
    output = np.zeros((output_h, output_w))

    for i in range(0, output_h * stride, stride):
        for j in range(0, output_w * stride, stride):
            output[i // stride, j // stride] = np.max(
                image[i:i+pool_size, j:j+pool_size]
            )

    return output
max_pooling(image,pool_size=2,stride=2)

array([[2., 3.],
       [3., 3.]])

# Convolution on RGB Images

In [15]:
image = np.array([
    [[1, 0, 2], [2, 1, 1], [3, 2, 0], [0, 1, 1], [1, 0, 2]],
    [[0, 1, 0], [1, 0, 1], [2, 2, 2], [3, 1, 3], [2, 0, 1]],
    [[3, 0, 2], [0, 1, 0], [1, 0, 1], [2, 2, 2], [1, 0, 0]],
    [[2, 1, 1], [1, 0, 2], [3, 3, 1], [0, 1, 0], [0, 2, 1]],
    [[1, 2, 2], [2, 1, 0], [0, 0, 1], [1, 2, 2], [2, 1, 1]]
])
image.shape

(5, 5, 3)

In [16]:
kernel = np.array([
    [[0, 1, 0], [1, -1, 1], [0, 1, 0]],
    [[1, 0, 1], [0, -1, 0], [1, 0, 1]],
    [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
])
kernel.shape

(3, 3, 3)

In [17]:
image_h, image_w, image_c = image.shape

print(image_h, image_w, image_c)
kernel_h, kernel_w, kernel_c = kernel.shape
print(kernel_h, kernel_w, kernel_c )
output_h = image_h - kernel_h + 1
output_w = image_w - kernel_w + 1
output = np.zeros((output_h, output_w, 1))
output.shape

5 5 3
3 3 3


(3, 3, 1)

In [25]:
    for k in range(image_c):  # Apply the convolution for each channel
            for i in range(output_h):
                for j in range(output_w):
                    # print(np.sum(image[i:i+kernel_h, j:j+kernel_w, k] * kernel))
                    output[i, j] = np.sum(image[i:i+kernel_h, j:j+kernel_w, k] * kernel)
                    # break
                # break
            # break
    output

array([[[10.],
        [12.],
        [12.]],

       [[16.],
        [15.],
        [11.]],

       [[ 8.],
        [11.],
        [15.]]])

In [26]:
# Convolution operation
def convolve_rgb(image, kernel):
    image_h, image_w, image_c = image.shape
    kernel_h, kernel_w, kernel_c = kernel.shape
    output_h = image_h - kernel_h + 1
    output_w = image_w - kernel_w + 1
    output = np.zeros((output_h, output_w, 1))

    for k in range(image_c):  # Apply the convolution for each channel
        for i in range(output_h):
            for j in range(output_w):
                output[i, j] = np.sum(image[i:i+kernel_h, j:j+kernel_w, k] * kernel)
    return output
print(convolve_rgb(image,kernel))

[[[10.]
  [12.]
  [12.]]

 [[16.]
  [15.]
  [11.]]

 [[ 8.]
  [11.]
  [15.]]]


# Implementing the same with Keras

In [28]:
import tensorflow as tf
from  tensorflow.keras import layers,models

In [32]:
input_shape=(100,100,3)
model=models.Sequential()
model.add(layers.Input(shape=input_shape)) # input layer

model.add(layers.Conv2D(32, (3, 3), padding='same', strides=1, activation='relu')) # add covolution layer

model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2)) # add max pooling layer

# add more con and max pooling layer
model.add(layers.Conv2D(32, (3, 3), padding='same', strides=1, activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))

model.summary()