In [None]:
import numpy as np

# --- CONV2D FORWARD PASS --- 

def conv2d_forward(input_data, kernel, stride=1, padding=0):
    """
    Performs a 2D convolution forward pass (single channel input).
    Note: We are implementing the loops manually for low-level understanding.
    """
    
    # 1. SETUP SHAPES
    # Input Data: (H, W) - Height, Width (e.g., 5x5 image)
    (i_h, i_w) = input_data.shape 
    # Kernel: (F_h, F_w) - Filter Height, Filter Width (e.g., 3x3 filter)
    (f_h, f_w) = kernel.shape
    
    # Calculate Output Dimensions
    # Formula for output size (no padding, stride=1): O = (I - F) + 1
    o_h = int((i_h - f_h) / stride) + 1
    o_w = int((i_w - f_w) / stride) + 1
    
    # Initialize the output Feature Map
    output = np.zeros((o_h, o_w))
    
    # 2. THE SLIDING WINDOW (The Core of CNNs)
    # Slide the filter vertically (i) and horizontally (j)
    for i in range(o_h):
        for j in range(o_w):
            
            # Define the current slice (window) of the input data
            # Use the stride to skip pixels
            i_start = i * stride
            j_start = j * stride
            
            # Get the slice matching the kernel size
            input_slice = input_data[i_start : i_start + f_h, 
                                     j_start : j_start + f_w]
            
            # 3. THE DOT PRODUCT (Multiplication and Summation)
            # The convolution operation: Element-wise multiply the slice and the kernel, then sum
            feature_value = np.sum(input_slice * kernel)
            
            # Store the single result in the output feature map
            output[i, j] = feature_value
            
    return output

# --- DEMO ---
# Simple 5x5 image (H, W)
X = np.array([
    [1, 1, 1, 0, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 1, 1, 1],
    [0, 0, 1, 1, 0],
    [0, 1, 1, 0, 0]
])

# Edge detection filter (3x3 kernel)
K = np.array([
    [1, 0, -1],
    [1, 0, -1],
    [1, 0, -1]
])

# Run the convolution
# 5x5 image with 3x3 kernel (stride 1, no padding) should yield a 3x3 output.
output_feature_map = conv2d_forward(X, K)

print("Input Image (X):\n", X)
print("\nFilter (K):\n", K)
print("\nOutput Feature Map (5-3+1 = 3x3):\n", output_feature_map)

In [None]:
# --- MAXPOOL FORWARD PASS ---
def maxpool_forward(feature_map, pool_size=2, stride=2):
    """
    Performs Max Pooling (downsampling).
    Finds the maximum value within a pool_size window.
    """
    (i_h, i_w) = feature_map.shape
    
    # Calculate Output Dimensions
    # O = I / S (for size=stride)
    o_h = int(i_h / stride)
    o_w = int(i_w / stride)
    
    output = np.zeros((o_h, o_w))
    
    for i in range(o_h):
        for j in range(o_w):
            
            # Define the current pooling window
            i_start = i * stride
            j_start = j * stride
            
            # Get the slice matching the pool size
            input_slice = feature_map[i_start : i_start + pool_size,
                                      j_start : j_start + pool_size]
            
            # The Max Pool operation: Find the single largest value
            output[i, j] = np.max(input_slice)
            
    return output

# --- DEMO ---
# Use the output from the convolution above (3x3)
# We will pad it to 4x4 for a clean 2x2 maxpool (usually images are padded)
padded_map = np.pad(output_feature_map, ((0,1), (0,1)), 'constant') # Simple padding to make 4x4
pooled_output = maxpool_forward(padded_map, pool_size=2, stride=2)

print("\nPadded Feature Map (4x4):\n", padded_map)
print("\nMax Pooled Output (2x2):\n", pooled_output)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# --- CONV2D WITH PyTorch ---
def conv2d_pytorch_forward(input_data, kernel, stride=1, padding=0):
    """
    PyTorch version - No manual loops needed!
    """
    
    # Convert numpy arrays to PyTorch tensors
    # Add batch and channel dimensions: (batch_size, channels, height, width)
    # Our input is (H,W) -> needs to be (1, 1, H, W) for PyTorch conv2d
    input_tensor = torch.from_numpy(input_data).float().unsqueeze(0).unsqueeze(0)
    
    # Kernel also needs same dimensions: (out_channels, in_channels, H, W)
    kernel_tensor = torch.from_numpy(kernel).float().unsqueeze(0).unsqueeze(0)
    
    print(f"Input shape: {input_tensor.shape}")
    print(f"Kernel shape: {kernel_tensor.shape}")
    
    # METHOD 1: Using torch.nn.functional.conv2d (most common)
    output = F.conv2d(input_tensor, kernel_tensor, 
                     stride=stride, padding=padding)
    
    # Remove batch and channel dimensions for comparison
    output = output.squeeze()
    
    return output.numpy()

# --- DEMO ---
print("=== PyTorch CONVOLUTION ===")
X = np.array([
    [1, 1, 1, 0, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 1, 1, 1],
    [0, 0, 1, 1, 0],
    [0, 1, 1, 0, 0]
])

K = np.array([
    [1, 0, -1],
    [1, 0, -1],
    [1, 0, -1]
])

# One line of code does all the work we manually coded!
output_pytorch = conv2d_pytorch_forward(X, K)
print("PyTorch Output:\n", output_pytorch)

# --- EVEN SIMPLER: Using nn.Conv2d layer ---
print("\n" + "="*50)
print("Using nn.Conv2d Layer (Even Easier!)")

# Create a convolution layer
conv_layer = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, 
                      stride=1, padding=0, bias=False)

# Set the weights to our custom kernel
with torch.no_grad():
    conv_layer.weight.data = torch.from_numpy(K).float().unsqueeze(0).unsqueeze(0)

# Forward pass
input_tensor = torch.from_numpy(X).float().unsqueeze(0).unsqueeze(0)
output_layer = conv_layer(input_tensor)

print("nn.Conv2d Output:\n", output_layer.squeeze().numpy())

In [None]:
# --- MAX POOLING WITH PyTorch ---
def maxpool_pytorch_forward(feature_map, pool_size=2, stride=2):
    """
    PyTorch max pooling - super simple!
    """
    
    # Convert to tensor and add dimensions
    input_tensor = torch.from_numpy(feature_map).float().unsqueeze(0).unsqueeze(0)
    
    print(f"Pool input shape: {input_tensor.shape}")
    
    # METHOD 1: Using torch.nn.functional.max_pool2d
    output = F.max_pool2d(input_tensor, kernel_size=pool_size, 
                         stride=stride)
    
    # Remove extra dimensions
    output = output.squeeze()
    
    return output.numpy()

# --- DEMO ---
print("\n=== PyTorch MAX POOLING ===")

# Use the convolution output and pad it
padded_map = np.pad(output_pytorch, ((0,1), (0,1)), 'constant')

# One line does all the max pooling!
pooled_pytorch = maxpool_pytorch_forward(padded_map)
print("Padded Input:\n", padded_map)
print("PyTorch Max Pool Output:\n", pooled_pytorch)

# --- USING nn.MaxPool2d LAYER ---
print("\n" + "="*50)
print("Using nn.MaxPool2d Layer")

# Create max pool layer
maxpool_layer = nn.MaxPool2d(kernel_size=2, stride=2)

# Forward pass
input_tensor = torch.from_numpy(padded_map).float().unsqueeze(0).unsqueeze(0)
pooled_layer = maxpool_layer(input_tensor)

print("nn.MaxPool2d Output:\n", pooled_layer.squeeze().numpy())