In [1]:
import requests

# URLs of the files
sample_image_cat = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/cat.jpg'
sample_image_dog = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/dog.jpg'

# Function to download a file
def download_file(url, file_name):
    response = requests.get(url)
    response.raise_for_status()  # Ensure we notice bad responses
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_name} from {url}')

# Downloading the files
download_file(sample_image_cat, 'cat.jpg')
download_file(sample_image_dog, 'dog.jpg')

Downloaded cat.jpg from https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/cat.jpg
Downloaded dog.jpg from https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/dog.jpg


# PartA: Image Processing Warm-up

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2 # !pip install opencv-python
from PIL import Image

In [4]:
# Utility function to display images side by side
def display_images(images, titles, figsize=(15, 5)):
    """
    Display multiple images in a row
    Args:
        images: List of images to display
        titles: List of titles for each image
        figsize: Figure size (width, height)
    """
    fig, axes = plt.subplots(1, len(images), figsize=figsize)
    if len(images) == 1:
        axes = [axes]
    
    for ax, img, title in zip(axes, images, titles):
        if len(img.shape) == 2:  # Grayscale image
            ax.imshow(img, cmap='gray')
        else:  # Color image
            ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        ax.set_title(title)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
# Choose your cat or dog team process one of the image
path = "cat.jpg"
path = "dog.jpg"

# 1. Reading Images

In [None]:
# Read image using OpenCV (BGR format)
img_cv = cv2.imread(path)

# Read image using PIL (RGB format)
img_pil = Image.open(path)
img_pil_array = np.array(img_pil)


In [None]:
# What the size of the image (How many channels)? On how many bits are encodded the pixel?
print(f"\nImage Dimensions:")
print(f"Height: {height} pixels")
print(f"Width: {width} pixels")
print(f"Channels: {channels}")
print(f"Total pixels: {height * width}")
print(f"bits depth:", img_cv.dtype)

In [None]:
# What the average of Red, of Green, of Blue?

for channel in range(3):
    channel_data = 
    print(f"  Mean: {channel_data.mean():.2f}")

In [None]:
display_images([img_cv], [path])

# 2. Basic Image Manipulations

In [None]:
height, width, channels = img_cv.shape

# Convert to grayscale
img_gray = 

# Rotate image 45deg
# Get the image center and create the rotation matrix
img_rotated = 

# Flip image horizontally
img_flipped = 

# Display original and manipulated images
display_images(
    [img_cv, img_gray, img_rotated, img_flipped],
    ['Original', 'Grayscale', 'Rotated 45°', 'Flipped']
)



# 3. Channel Splitting and Histograms

In [None]:
b, g, r = cv2.split(img_cv)

# Display individual channels
display_images(
    [b, g, r],
    ['Blue Channel', 'Green Channel', 'Red Channel']
)


In [None]:
# Plot histograms for each channel
plt.figure(figsize=(15, 5))
colors = ['b', 'g', 'r']
channels = [b, g, r]
titles = ['Blue', 'Green', 'Red']

for idx, (channel, color, title) in enumerate(zip(channels, colors, titles)):
    plt.subplot(1, 3, idx + 1)
    plt.hist(channel.ravel(), bins=256, color=color, alpha=0.7)
    plt.title(f'{title} Channel Histogram')
    plt.xlabel('Pixel Value')
    plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

# 4. Basic Image Transformations

In [None]:
# Resize image to width//2, height//2
img_resized = cv2.resize(img_cv, (width//2, height//2))

# Apply Gaussian blur
img_blurred = cv2.GaussianBlur(img_cv, (5, 5), 0)

# Apply edge detection
img_edges = cv2.Canny(img_cv, 100, 200)

# Display transformations
display_images(
    [img_resized, img_blurred, img_edges],
    ['Resized (50%)', 'Gaussian Blur', 'Edge Detection']
)

# Part B: Convolution Warm-up

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import convolve2d
import cv2

In [2]:
def apply_kernel(image, kernel):
    """
    Apply convolution with given kernel and return result
    """
    return convolve2d(image, kernel, mode='same', boundary='wrap')

### Simple geometric image forms

In [3]:
size = 20
# Create a simple cross pattern
cross = np.zeros((size, size))
cross[size//2, :] = 1
cross[:, size//2] = 1

# Create a diagonal line
diagonal = np.eye(size)

# Create a box
box = np.zeros((size, size))
box[size//4:3*size//4, size//4:3*size//4] = 1

# Display original patterns
display_images(
    [cross, diagonal, box],
    ['Cross Pattern', 'Diagonal Pattern', 'Box Pattern']
)

NameError: name 'display_images' is not defined

### Common convolution kernels

In [None]:
# Edge detection kernels
sobel_x = np.array([
    [-1, 0, 1],
    [-2, 0, 2],
    [-1, 0, 1]
])

sobel_y = np.array([
    [-1, -2, -1],
    [0, 0, 0],
    [1, 2, 1]
])

# Sharpening kernel
sharpen = np.array([
    [0, -1, 0],
    [-1, 5, -1],
    [0, -1, 0]
])

# Gaussian blur kernel (3x3)
gaussian = np.array([
    [1/16, 1/8, 1/16],
    [1/8, 1/4, 1/8],
    [1/16, 1/8, 1/16]
])

# Display kernels
display_images(
    [sobel_x, sobel_y, sharpen, gaussian],
    ['Sobel X', 'Sobel Y', 'Sharpen', 'Gaussian'],
    figsize=(20, 5)
)

In [None]:
# Apply convolutions to cross pattern
# ----------------------------------
# Apply each kernel to the cross pattern
cross_sobel_x = apply_kernel(cross, sobel_x)
cross_sobel_y = apply_kernel(cross, sobel_y)
cross_sharpen = apply_kernel(cross, sharpen)
cross_gaussian = apply_kernel(cross, gaussian)

display_images(
    [cross, cross_sobel_x, cross_sobel_y, cross_sharpen, cross_gaussian],
    ['Original Cross', 'Sobel X', 'Sobel Y', 'Sharpened', 'Gaussian Blur'],
    figsize=(25, 5)
)

In [None]:
# Apply convolutions to diagonal pattern
# -------------------------------------
diag_sobel_x = apply_kernel(diagonal, sobel_x)
diag_sobel_y = apply_kernel(diagonal, sobel_y)
diag_sharpen = apply_kernel(diagonal, sharpen)
diag_gaussian = apply_kernel(diagonal, gaussian)

display_images(
    [diagonal, diag_sobel_x, diag_sobel_y, diag_sharpen, diag_gaussian],
    ['Original Diagonal', 'Sobel X', 'Sobel Y', 'Sharpened', 'Gaussian Blur'],
    figsize=(25, 5)
)

In [None]:
# Apply convolutions to box pattern
# --------------------------------
box_sobel_x = apply_kernel(box, sobel_x)
box_sobel_y = apply_kernel(box, sobel_y)
box_sharpen = apply_kernel(box, sharpen)
box_gaussian = apply_kernel(box, gaussian)

display_images(
    [box, box_sobel_x, box_sobel_y, box_sharpen, box_gaussian],
    ['Original Box', 'Sobel X', 'Sobel Y', 'Sharpened', 'Gaussian Blur'],
    figsize=(25, 5)
)

In [None]:
# Now Apply convolutions to your gray image (cat or dog)

catodog_sobel_x = apply_kernel(img_gray, sobel_x)
catodog_sobel_y = apply_kernel(img_gray, sobel_y)
catodog_sharpen = apply_kernel(img_gray, sharpen)
catodog_gaussian = apply_kernel(img_gray, gaussian)

display_images(
    [img_gray, catodog_sobel_x, catodog_sobel_y, catodog_sharpen, catodog_gaussian],
    ['Original Box', 'Sobel X', 'Sobel Y', 'Sharpened', 'Gaussian Blur'],
    figsize=(25, 5)
)

# Part C: Torch Cnn Warm up

In [3]:
# get another image
import requests

# URLs of the files
sample_image_catanddog = 'https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/catanddog.jpg'

# Function to download a file
def download_file(url, file_name):
    response = requests.get(url)
    response.raise_for_status()  # Ensure we notice bad responses
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_name} from {url}')

# Downloading the files
download_file(sample_image_catanddog, 'catanddog.jpg')

Downloaded catanddog.jpg from https://www.raphaelcousin.com/modules/data-science-practice/module7/exercise/catanddog.jpg


In [None]:
# read image
image = cv2.imread("catanddog.jpg")
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Get dimensions of the image
height, width, channels = image.shape

# Print the dimensions
print(f"Width: {width}, Height: {height}, Channels: {channels}")

# Get dimensions of the image
height, width = gray_image.shape

# Print the dimensions
print(f"Width: {width}, Height: {height}, Channels: 1")

In [None]:
display_images([image, gray_image], ['image', 'gray_image'])

### Complete the function to get the output size after a conv layer and after a pool layer and define it in torch to validate the size

In [None]:
from torch import nn
import torch

In [None]:
# Make the image tensors
x_rgb = torch.tensor(image, dtype=torch.float32)
x_gray = torch.tensor(gray_image, dtype=torch.float32)

### Conv Layers

In [None]:
def calculate_conv_output_size(input_size, kernel_size, stride, padding):
    """Calculate output size of convolution layer"""
    return TODO



In [None]:
# A. Configuration RGB : Small kernel (3), no padding, stride 1, 1 filter
conv1_rgb = nn.Conv2d(in_channels=, out_channels=, kernel_size=, stride=, padding=)
out_size_a = calculate_conv_output_size(input_size=, kernel_size=, stride=, padding=)
print(f"\nConfig A - Small kernel (3x3), no padding, stride 1:")
print(f"Output size: {out_size_a}")
print(f"Verification with torch:")
y_a = conv1_rgb(x_rgb)
print(f"Actual output shape: {y_a.shape}")

In [None]:
# B. Configuration RGB : Larger kernel, padding, stride 2
conv2_rgb = nn.Conv2d(in_channels=, out_channels=, kernel_size=, stride=, padding=)
out_size_b = calculate_conv_output_size(input_size=, kernel_size=, stride=, padding=)
print(f"\nConfig B - Larger kernel (5x5), padding 2, stride 2:")
print(f"Output size: {out_size_b}")
print(f"Verification with torch:")
y_b = conv2_rgb(x_rgb)
print(f"Actual output shape: {y_b.shape}")


In [None]:
# C. Configuration gray : Medium kernel, small padding, stride 1
conv1_gray = nn.Conv2d(in_channels=, out_channels=, kernel_size=, stride=, padding=)
out_size_c = calculate_conv_output_size(input_size=, kernel_size=, stride=, padding=)
print(f"\nConfig C - Medium kernel (4x4), padding 1, stride 1:")
print(f"Output size: {out_size_c}")
print(f"Verification with torch:")
y_c = conv1_gray(x_gray)
print(f"Actual output shape: {y_c.shape}")

### Pool Layers

In [None]:
def calculate_pool_output_size(input_size, kernel_size, stride):
    """Calculate output size of pooling layer"""
    return TODO

In [None]:
# Max Pooling after Conv Layer A (RGB)
pool_a = nn.MaxPool2d(kernel_size=, stride=)
pool_size_a = calculate_pool_output_size(input_size=, kernel_size=, stride=)
print(f"\nPooling after Config A:")
print(f"Output size: {pool_size_a}")
y_pool_a = pool_a(y_a)
print(f"Actual output shape: {y_pool_a.shape}")

# Max Pooling after Conv Layer C (Grayscale)
pool_c = nn.MaxPool2d(kernel_size=, stride=)
pool_size_c = calculate_pool_output_size(input_size=, kernel_size=, stride=)
print(f"\nPooling after Config C:")
print(f"Output size: {pool_size_c}")
y_pool_c = pool_c(y_c)
print(f"Actual output shape: {y_pool_c.shape}")


### Now get the flaten size after the pooling in order to add the fully connected layer

In [None]:

# 4. Flattening Layer
print("\n=== Flattening Layer Outputs ===")

# Flatten pooled output from RGB configuration
flat_size_rgb = 
print(f"\nFlattened size after RGB conv+pool:")
print(f"Output size: {flat_size_rgb}")
y_flat_rgb = y_pool_a.view(y_pool_a.size(0), -1)
print(f"Actual output shape: {y_flat_rgb.shape}")

# Flatten pooled output from Grayscale configuration
flat_size_gray =
print(f"\nFlattened size after Grayscale conv+pool:")
print(f"Output size: {flat_size_gray}")
y_flat_gray = y_pool_c.view(y_pool_c.size(0), -1)
print(f"Actual output shape: {y_flat_gray.shape}")



In [None]:
# 5. Fully Connected Layer with 1 Output
print("\n=== Final Fully Connected Layer ===")

# FC Layer for RGB path
fc_rgb = nn.Linear(flat_size_rgb, 1)
y_final_rgb = fc_rgb(y_flat_rgb)
print(f"\nFinal output shape (RGB path): {y_final_rgb.shape}")

# FC Layer for Grayscale path
fc_gray = nn.Linear(flat_size_gray, 1)
y_final_gray = fc_gray(y_flat_gray)
print(f"Final output shape (Grayscale path): {y_final_gray.shape}")



In [None]:
# Complete network architectures
class RGBNet(nn.Module):
    def __init__(self):
        super(RGBNet, self).__init__()
        self.conv = conv1_rgb
        self.pool = pool_a
        self.fc = fc_rgb
        
    def forward(self, x):
        x = self.conv(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

class GrayscaleNet(nn.Module):
    def __init__(self):
        super(GrayscaleNet, self).__init__()
        self.conv = conv1_gray
        self.pool = pool_c
        self.fc = fc_gray
        
    def forward(self, x):
        x = self.conv(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Part D: Training a CNN on MNIST

Now let's apply what we learned to train a CNN on the MNIST dataset for digit classification.