In [1]:
import numpy as np

#Convolutional Layer
class ConvLayer:
    def __init__(self, in_channels, out_channels, filter_size, stride=1, padding=0):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        # Initialize weights and biases
        self.W = np.random.randn(out_channels, in_channels, filter_size, filter_size) * 0.1
        self.b = np.zeros((out_channels, 1))

    def forward(self, X):

        self.X = X  # Cache for backward pass
        N, C, H, W = X.shape
        # Calculate output dimensions
        H_out = int((H - self.filter_size + 2*self.padding) / self.stride + 1)
        W_out = int((W - self.filter_size + 2*self.padding) / self.stride + 1)
        # Pad input if necessary
        X_padded = np.pad(X, ((0,0), (0,0), (self.padding,self.padding), (self.padding,self.padding)), mode='constant')
        self.X_padded = X_padded  # Cache for backward
        out = np.zeros((N, self.out_channels, H_out, W_out))

        # Naively slide the filter over the input
        for n in range(N):
            for f in range(self.out_channels):
                for i in range(H_out):
                    for j in range(W_out):
                        h_start = i * self.stride
                        h_end = h_start + self.filter_size
                        w_start = j * self.stride
                        w_end = w_start + self.filter_size
                        out[n, f, i, j] = np.sum(X_padded[n, :, h_start:h_end, w_start:w_end] * self.W[f]) + self.b[f]
        return out

    def backward(self, d_out, learning_rate):
        """
        d_out: Gradient of loss with respect to output, shape (N, out_channels, H_out, W_out)
        Updates the layer's parameters using gradient descent.
        Returns:
            dX: Gradient with respect to input X.
        """
        X = self.X
        X_padded = self.X_padded
        N, C, H, W = X.shape
        _, _, H_out, W_out = d_out.shape

        dW = np.zeros_like(self.W)
        db = np.zeros_like(self.b)
        dX_padded = np.zeros_like(X_padded)

        for n in range(N):
            for f in range(self.out_channels):
                for i in range(H_out):
                    for j in range(W_out):
                        h_start = i * self.stride
                        h_end = h_start + self.filter_size
                        w_start = j * self.stride
                        w_end = w_start + self.filter_size
                        # Gradient with respect to weights
                        dW[f] += d_out[n, f, i, j] * X_padded[n, :, h_start:h_end, w_start:w_end]
                        # Gradient with respect to bias
                        db[f] += d_out[n, f, i, j]
                        # Gradient with respect to the input
                        dX_padded[n, :, h_start:h_end, w_start:w_end] += d_out[n, f, i, j] * self.W[f]

        # Remove padding from gradient if necessary
        if self.padding > 0:
            dX = dX_padded[:, :, self.padding:-self.padding, self.padding:-self.padding]
        else:
            dX = dX_padded

        # Update parameters
        self.W -= learning_rate * dW
        self.b -= learning_rate * db

        return dX

# ReLU Activation
class ReLU:
    def forward(self, X):
        self.X = X  # Cache for backward pass
        return np.maximum(0, X)

    def backward(self, d_out):
        dX = d_out.copy()
        dX[self.X <= 0] = 0
        return dX

# Fully Connected (Dense) Layer
class FullyConnected:
    def __init__(self, input_dim, output_dim):
        self.W = np.random.randn(input_dim, output_dim) * 0.1
        self.b = np.zeros((1, output_dim))

    def forward(self, X):
        self.X = X  # Cache for backward
        return X.dot(self.W) + self.b

    def backward(self, d_out, learning_rate):
        dW = self.X.T.dot(d_out)
        db = np.sum(d_out, axis=0, keepdims=True)
        dX = d_out.dot(self.W.T)

        # Update parameters
        self.W -= learning_rate * dW
        self.b -= learning_rate * db

        return dX

# Softmax Loss
def softmax_loss(scores, y):
    shifted_scores = scores - np.max(scores, axis=1, keepdims=True)  # For numerical stability
    exp_scores = np.exp(shifted_scores)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    N = scores.shape[0]
    loss = -np.sum(np.log(probs[np.arange(N), y])) / N

    dscores = probs.copy()
    dscores[np.arange(N), y] -= 1
    dscores /= N

    return loss, dscores

# Simple CNN Model
class SimpleCNN:
    def __init__(self, num_classes=10):
        # For simplicity, we assume input images are 28x28 with 1 channel (e.g., MNIST)
        # Our conv layer uses padding=1 to preserve spatial dimensions.
        self.conv = ConvLayer(in_channels=1, out_channels=2, filter_size=3, stride=1, padding=1)
        self.relu = ReLU()
        # After convolution, the output shape is (N, 2, 28, 28).
        # We flatten it to a vector of size 2*28*28.
        self.fc = FullyConnected(input_dim=2*28*28, output_dim=num_classes)

    def forward(self, X):
        out = self.conv.forward(X)
        out = self.relu.forward(out)
        self.shape_cache = out.shape  # Cache shape for backward
        out_flat = out.reshape(out.shape[0], -1)
        scores = self.fc.forward(out_flat)
        return scores

    def backward(self, dscores, learning_rate):
        d_out_flat = self.fc.backward(dscores, learning_rate)
        d_out = d_out_flat.reshape(self.shape_cache)
        d_out = self.relu.backward(d_out)
        self.conv.backward(d_out, learning_rate)

#Training Loop for the CNN
def train_cnn():
    np.random.seed(0)
    cnn = SimpleCNN(num_classes=10)
    learning_rate = 0.01
    num_iters = 100
    batch_size = 5

    # Create dummy data: 20 random grayscale images of shape 28x28 and random labels (0 to 9)
    X = np.random.randn(20, 1, 28, 28)
    y = np.random.randint(0, 10, size=20)

    for i in range(num_iters):
        # Select a random mini-batch
        idx = np.random.choice(20, batch_size, replace=False)
        X_batch = X[idx]
        y_batch = y[idx]

        # Forward pass
        scores = cnn.forward(X_batch)
        loss, dscores = softmax_loss(scores, y_batch)

        # Backward pass and parameter update
        cnn.backward(dscores, learning_rate)

        if i % 10 == 0:
            print(f"Iteration {i}, CNN loss: {loss:.4f}")

# Run the CNN training loop
train_cnn()


  out[n, f, i, j] = np.sum(X_padded[n, :, h_start:h_end, w_start:w_end] * self.W[f]) + self.b[f]


Iteration 0, CNN loss: 2.9075
Iteration 10, CNN loss: 1.8861
Iteration 20, CNN loss: 1.2991
Iteration 30, CNN loss: 1.0878
Iteration 40, CNN loss: 0.4380
Iteration 50, CNN loss: 0.3618
Iteration 60, CNN loss: 0.2631
Iteration 70, CNN loss: 0.0980
Iteration 80, CNN loss: 0.0748
Iteration 90, CNN loss: 0.1113
