### Homework 04: Optical Character Recognition

Now that you have the segmented letters from the previous task, we need a way to actually convert the letters to text! You can't be bothered to just transcribe the images yourself, but you remember your professor droning on about something called MNIST and you think that these letters might be kind of similar to handwritten digits.

Unfortunately, because your professor hates you, he's making you write a FFN using only numpy for the first part of this assignment. Use the dataset available from the following link for training, testing, and validation on this assignment. [Alphabet Cuttings Dataset](https://drive.google.com/drive/folders/1xK3Mp9BhXWpae-ZicfGtTqkVRW-x8ntI?usp=sharing)

The code immediately below is for loading and formatting the dataset. You don't have to do anything here yourself.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

try:
    import google.colab
    REPO_URL = "https://github.com/nd-cse-30124-fa25/cse-30124-homeworks.git"
    REPO_NAME = "cse-30124-homeworks"
    HW_FOLDER = "homework04" 

    # Clone repo if not already present
    if not os.path.exists(REPO_NAME):
        !git clone {REPO_URL}

    # cd into the homework folder
    %cd {REPO_NAME}/{HW_FOLDER}

except ImportError:
    pass
    
def detect_rgb_contours(input_path, display=False):
    """
    Detect contours in the RGB channels of a PNG image and draw all contours in hierarchy.

    Args:
        input_path (str): Path to the input PNG image
        line_thickness (int): Thickness of contour lines in pixels
    """
    # Read the image with alpha channel
    img = cv2.imread(input_path, cv2.IMREAD_UNCHANGED)

    # Extract the RGB channels
    rgb_img = img[:, :, :3]

    # Convert to grayscale for contour detection
    gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
    if display:
        display(Image.fromarray(gray))

    # Setting parameter values
    t_lower = 50  # Lower Threshold
    t_upper = 150  # Upper threshold

    # Applying the Canny Edge filter
    edge = cv2.Canny(gray, t_lower, t_upper)
    # Close the edges to form complete contours
    if display:
        display(Image.fromarray(edge))

    # Find contours recursively
    contours, hierarchy = cv2.findContours(edge, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Create a separate image for each contour with different colors
    mnist_img = np.zeros((rgb_img.shape[0], rgb_img.shape[1]), dtype=np.uint8)
    if display:
        print(mnist_img.shape)
        print(len(hierarchy))

        print(hierarchy)

    # Generate a different color for each contour based on index
    for i, contour in enumerate(contours):
        if i == 1:
            cv2.drawContours(mnist_img, [contour], -1, 0, thickness=cv2.FILLED)
        elif i % 2 == 1:
            cv2.drawContours(mnist_img, [contour], -1, 255, thickness=cv2.FILLED)

    mnist_img = cv2.resize(mnist_img, (28, 28))
    # Display the result with multiple contours
    if display:
        plt.figure(figsize=(10, 10))
        plt.imshow(mnist_img, cmap='gray')
        plt.axis('off')
        plt.title(f"All {len(contours)} contours with unique colors")
        plt.show()

    return mnist_img

In [None]:
import numpy as np
from PIL import Image
import os
from collections import defaultdict
import random
from sklearn.preprocessing import LabelEncoder
from typing import Dict, Tuple
from IPython.display import display

def load_letter_dataset(data_dir: str, train_size: int = 7, test_size: int = 2, holdout_size: int = 1) -> Dict:
    """
    Load and split letter dataset into train, test, and holdout sets.
    """
    # Verify split sizes
    assert train_size + test_size + holdout_size == 10, "Split sizes must sum to 10"

    # Dictionary to store all instances of each letter
    letter_instances = defaultdict(list)

    # Collect all image paths
    for filename in os.listdir(data_dir):
        if filename.endswith('.png') and not filename[0].isdigit():
            letter = filename[0]  # First character is the letter
            instance_path = os.path.join(data_dir, filename)
            letter_instances[letter].append(instance_path)

    train_data = {'images': [], 'labels': []}
    test_data = {'images': [], 'labels': []}
    holdout_data = {'images': [], 'labels': []}

    # Process each letter
    for letter, instances in letter_instances.items():
        # Randomly shuffle the instances
        random.shuffle(instances)

        # Split into train/test/holdout
        train_paths = instances[:train_size]
        test_paths = instances[train_size:train_size + test_size]
        holdout_paths = instances[train_size + test_size:]

        # Load images and add to respective sets
        for path in train_paths:
            img = detect_rgb_contours(path)
            train_data['images'].append(img)
            train_data['labels'].append(letter)

        for path in test_paths:
            img = detect_rgb_contours(path)
            test_data['images'].append(img)
            test_data['labels'].append(letter)

        for path in holdout_paths:
            img = detect_rgb_contours(path)
            holdout_data['images'].append(img)
            holdout_data['labels'].append(letter)

    print(train_data['labels'][0], train_data['images'][0].shape)
    plt.figure(figsize=(10, 10))
    plt.imshow(train_data['images'][0], cmap='gray')
    plt.axis('off')
    plt.show()

    # Convert to numpy arrays
    for dataset in [train_data, test_data, holdout_data]:
        dataset['images'] = np.array(dataset['images'])
        dataset['labels'] = np.array(dataset['labels'])

    return {
        'train': train_data,
        'test': test_data,
        'holdout': holdout_data
    }

def prepare_data(data_dict: Dict) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    Prepare data for FFN training:
    - Preprocess all images
    - Convert labels to numerical format
    - Split into features (X) and labels (y)
    """
    # Process training data
    X_train = np.array([img.reshape(-1) / 255 for img in data_dict['train']['images']])
    X_test = np.array([img.reshape(-1) / 255 for img in data_dict['test']['images']])

    # Convert labels to numerical format
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(data_dict['train']['labels'])
    y_test = label_encoder.transform(data_dict['test']['labels'])

    # Save label encoder mapping for reference
    label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
    print("Label mapping:", label_mapping)

    return X_train, X_test, y_train, y_test

**Neural Network from Scratch**

Your task is to implement a simple neural network from scratch in numpy to classify the letters in the dataset following the architecture shown below.

In order to actually implement a training regime for our network, we'll need to specify a loss function that we can use to measure how well our network is doing. We'll use the cross entropy loss function as we're attempting a multiclass classification task.

![Cross Entropy Loss](https://pbs.twimg.com/media/FBmVmdHWQAAU7gq.png)

Training our network will consist of two steps primarily, forward propagation and back propagation.

Forward propagation is the process of taking our input data, and passing it through the network to get a prediction.

Back propagation is the process of taking the derivative of the loss function with respect to the weights and biases, and using gradient descent to update the weights and biases.

![NN Training](https://raw.githubusercontent.com/SkalskiP/ILearnDeepLearning.py/e300c61fc39e480bad8d4d83616e763334b74ec7/01_mysteries_of_neural_networks/03_numpy_neural_net/supporting_visualizations/blueprint.gif)

In this gif we can see a brief outline of the forward and backward propagation steps.

Broadly speaking, forward is what gives us our prediction, and backward is what gives us the gradient of the loss function with respect to the weights and biases, and is how we update the weights to get closer to the right answer (done by minimizing the loss function).

We'll also need to implement a couple activation functions and their derivatives.

We're going to be using the ReLU activation function for our hidden layers, and a softmax function for our output layer. The softmax will allow us to map our output to a probability between 0 and 1 and from there to a class based on an argmax operation.

![Activation Functions](https://raw.githubusercontent.com/SkalskiP/ILearnDeepLearning.py/e300c61fc39e480bad8d4d83616e763334b74ec7/01_mysteries_of_neural_networks/03_numpy_neural_net/supporting_visualizations/activations.gif)

Here we can see both activation functions and their derivatives.

The part that most people find tricky about this is the backpropagation step.

As we've seen in class for "single layer" examples, to optimize the weights of a model using gradient descent, we can rewrite the loss function in terms of the weights and then take partial derivatives with respect to each weight.

![Gradient Descent](https://global.discourse-cdn.com/dlai/original/3X/f/5/f58df86a4c92695569d9536d7e752161cd0f98fb.jpeg)

Will multilayer networks, how do we take the derivative of the loss function with respect to the weights, if the weights in the previous layer are reliant on the weights in the layer before them?

Backpropagation is the solution to this and revolves around using the chain rule to take essentially a series of partial derivatives backwards through the network to get the gradient of the loss function with respect to the weights at each layer. We can then redistribute these gradients to update the weights of the network.

![Backprop](https://miro.medium.com/v2/resize:fit:1200/0*9lo2ux8ASvt6YJkH.gif)

**BETTER TEACHING**

To be honest, your best bet is to watch the youtube videos by 3Blue1Brown. He's an incredible teacher and will do a better job than I can, along with better visualizations.

[![Introduction to Neural Networks](https://img.youtube.com/vi/aircAruvnKk/0.jpg)](https://www.youtube.com/watch?v=aircAruvnKk)

This is an introduction to neural networks using the MNIST dataset!

Then we have a great video on gradient descent.

[![Gradient Descent](https://img.youtube.com/vi/IHZwWFHWa-w/0.jpg)](https://www.youtube.com/watch?v=IHZwWFHWa-w)

Finally I'd recommend at least his first video on backpropagation, though you should probably watch the second too.

[![Backprop](https://img.youtube.com/vi/Ilg3gGewQ5U/0.jpg)](https://www.youtube.com/watch?v=Ilg3gGewQ5U)


In [None]:
import numpy as np

class LinearLayer:
    """
    A fully connected (dense) layer that performs a linear transformation.

    Attributes:
        W (numpy.ndarray): Weight matrix with shape (output_dim, input_dim).
        b (numpy.ndarray): Bias vector with shape (output_dim, 1).
        X (numpy.ndarray): Cached input used during the forward pass.
        dW (numpy.ndarray): Gradient with respect to the weights.
        db (numpy.ndarray): Gradient with respect to the biases.
    """
    def __init__(self, input_dim, output_dim):
        """
        Initialize the LinearLayer with random weights and biases using He initialization.

        Args:
            input_dim (int): Dimension of the input features.
            output_dim (int): Number of neurons (output features).

        Weight initialization:
            Weights and biases are initialized from a normal distribution and scaled by sqrt(2/input_dim).
        """
        self.W = np.random.randn(output_dim, input_dim) * np.sqrt(2.0 / input_dim)
        self.b = np.random.randn(output_dim, 1) * np.sqrt(2.0 / input_dim)

    def forward(self, X):
        """
        Compute the forward pass of the linear layer.

        Args:
            X (numpy.ndarray): Input data with shape (input_dim, m) where m is the number of examples.

        Returns:
            numpy.ndarray: Linear output with shape (output_dim, m)

        Notes:
            The input X is stored for use during backpropagation.
        """

        # TODO: Store the input and calculate the output of the linear layer
        pass

    def backward(self, dA):
        """
        Compute the backward pass of the linear layer.

        Args:
            dA (numpy.ndarray): Gradient of the loss with respect to the output of this layer,
                                having shape (output_dim, m).

        Returns:
            numpy.ndarray: Gradient of the loss with respect to the input X,
                           with shape (input_dim, m).

        Updates:
            Sets self.dW as the gradient with respect to W (shape: (output_dim, input_dim)).
            Sets self.db as the gradient with respect to b (shape: (output_dim, 1)).
        """

        # TODO: Calculate the gradient of the loss with respect to the weights and biases
        # TODO: Return the gradient of the loss with respect to the input
        pass

    def update(self, lr):
        """
        Update the parameters of the layer using gradient descent.

        Args:
            lr (float): Learning rate for the parameter update.

        Returns:
            None
        """

        # TODO: Update the weights and biases of the layer using the learning rate
        pass

class ReLU:
    """
    Rectified Linear Unit (ReLU) activation function.
    """
    def forward(self, X):
        """
        Compute the forward pass using ReLU activation.

        Args:
            X (numpy.ndarray): Input data of any shape.

        Returns:
            numpy.ndarray: Output after applying ReLU element-wise (same shape as X).
        """

        # TODO: Store the input and calculate the output of the ReLU layer
        pass

    def backward(self, dA):
        """
        Compute the backward pass for the ReLU activation.

        Args:
            dA (numpy.ndarray): Gradient of the loss with respect to the ReLU output,
                                having the same shape as the input X.

        Returns:
            numpy.ndarray: Gradient of the loss with respect to the input X.
        """

        # TODO: Calculate the gradient of the loss with respect to the input
        # TODO: Return the gradient of the loss with respect to the input
        pass

    def update(self, lr):
        """
        Update function for ReLU activation. Since ReLU has no parameters, no update is performed.

        Args:
            lr (float): Learning rate

        Returns:
            None
        """
        # TODO: Update the weights and biases of the layer using the learning rate
        pass

class Softmax:
    """
    Softmax activation function typically used at the output layer for multi-class classification.
    """
    def forward(self, X):
        """
        Compute the forward pass using softmax activation.

        Args:
            X (numpy.ndarray): Input data with shape (n_classes, m), where n_classes is the number of classes
                               and m is the number of examples.

        Returns:
            numpy.ndarray: Softmax probabilities with shape (n_classes, m).
        """
        # TODO: Store the input and calculate the output of the softmax layer
        pass

    def backward(self, dA):
        """
        Compute the backward pass for the softmax activation.

        Args:
            dA (numpy.ndarray): Gradient of the loss with respect to the softmax output,
                                having shape (n_classes, m).

        Returns:
            numpy.ndarray: Passed-through gradient

        Note:
            Often the derivative is combined with cross-entropy loss simplifying the gradient.
        """

        # TODO: Calculate the gradient of the loss with respect to the input
        # TODO: Return the gradient of the loss with respect to the input
        pass

    def update(self, lr):
        """
        Update function for Softmax activation. No update is performed because softmax has no trainable parameters.

        Args:
            lr (float): Learning rate

        Returns:
            None
        """

        # TODO: Update the weights and biases of the layer using the learning rate
        pass

class NumpyNeuralNetwork:
    """
    A neural network implemented using numpy for classification tasks on MNIST-like data.

    Assumed Input:
        - X: Each column is a flattened 28x28 MNIST style image, i.e., shape (784, m) where m is the number of examples.

    Example Architecture:
        - Layer 1: Linear layer mapping from 784 to 26 features.
        - Output Activation: Softmax.

    The network supports forward propagation, backpropagation (with cross-entropy loss derivative),
    converting probabilities to class labels, and training via mini-batch gradient descent.
    """
    def __init__(self, seed=42):
        """
        Initialize the neural network and its layers.

        Args:
            seed (int): Random seed for reproducibility. Default is 42.

        Notes:
            The network's weights and biases are initialized in their own init functions using He initialization.
        """
        np.random.seed(seed)
        
        self.L1 = LinearLayer(784, 26)
        self.softmax = Softmax()

        self.layers = [self.L1, self.softmax]

    def forward(self, X):
        """
        Perform a forward pass through the entire network.

        Args:
            X (numpy.ndarray): Input data with shape (784, m), where m is the number of examples.

        Returns:
            numpy.ndarray: Output probabilities from the network with shape (n_classes, m).
                           Here n_classes is 26.
        """

        # TODO: Calculate the output of the network

        return X

    def cross_entropy(self, Y_hat, Y):
        """
        Compute the cross-entropy loss.

        Args:
            Y_hat (numpy.ndarray): Predicted probability matrix of shape (n_classes, m).
            Y (numpy.ndarray): One-hot encoded true labels of shape (n_classes, m).

        Returns:
            float: The average cross-entropy loss over all m examples.

        Notes:
            A small constant epsilon is added to Y_hat to avoid computing log(0).
        """

        # TODO: Calculate the cross-entropy loss
        pass

    def convert_prob_into_class(self, probs):
        """
        Convert predicted probability distributions into class labels.

        Args:
            probs (numpy.ndarray): Predicted probabilities with shape (n_classes, m).

        Returns:
            numpy.ndarray: Array of predicted class labels with shape (m,).
        """

        # TODO: Convert the probabilities into a class
        pass

    def get_accuracy(self, Y_hat, Y):
        """
        Compute the classification accuracy.

        Args:
            Y_hat (numpy.ndarray): Predicted probability matrix from the network, shape (n_classes, m).
            Y (numpy.ndarray): One-hot encoded true labels, shape (n_classes, m).

        Returns:
            float: Accuracy as a fraction between 0 and 1.
        """

        # TODO: Calculate the accuracy of the network
        pass

    def backprop(self, Y_hat, Y):
        """
        Perform backpropagation over the entire network to compute gradients.

        Args:
            Y_hat (numpy.ndarray): Predicted output probabilities, shape (n_classes, m).
            Y (numpy.ndarray): One-hot encoded true labels, shape (n_classes, m).

        Process:
            Starts by computing the derivative of the cross-entropy loss with respect to the final layer
            and then propagate the gradients backward through all layers.
        """

        # TODO: Calculate the gradient of the loss with respect to the input
        pass

    def train(self, X, Y, epochs, learning_rate, batch_size=32, verbose=False):
        """
        Train the neural network using mini-batch gradient descent.

        Args:
            X (numpy.ndarray): Input data with shape (784, m), where each column is a flattened MNIST style image.
            Y (numpy.ndarray): One-hot encoded labels with shape (n_classes, m), where n_classes is 26
            epochs (int): Number of epochs for training.
            learning_rate (float): Learning rate for the parameter updates.
            batch_size (int, optional): Number of examples per mini-batch. Default is 32.
            verbose (bool, optional): If True, prints training progress every 500 epochs. Default is False.

        Returns:
            dict: A dictionary containing:
                - 'loss_history': List of loss values for each epoch.
                - 'accuracy_history': List of accuracy values for each epoch.

        Process:
            - Shuffles the dataset each epoch.
            - Processes data in mini-batches.
            - Performs a forward pass, backpropagation, and parameter updates for each mini-batch.
            - Computes the loss and accuracy for the entire dataset after each epoch.
        """
        loss_history = []
        accuracy_history = []
        m = X.shape[1]
        
        for i in range(epochs):
            # Mini-batch processing
            permutation = np.random.permutation(m)
            X_shuffled = X[:, permutation]
            Y_shuffled = Y[:, permutation]
            
            for j in range(0, m, batch_size):
                X_batch = X_shuffled[:, j:j+batch_size]
                Y_batch = Y_shuffled[:, j:j+batch_size]
                
                # Forward propagation
                # TODO: Calculate the output of the network
                
                # Backward propagation
                # TODO: Calculate the gradients of the loss with respect to the input
                
                # Update parameters
                # TODO: Update the weights and biases of the layer using the learning rate
            
            # Calculate metrics for the whole epoch
            Y_hat_full = self.forward(X)
            loss = self.cross_entropy(Y_hat_full, Y)
            accuracy = self.get_accuracy(Y_hat_full, Y)
            
            loss_history.append(loss)
            accuracy_history.append(accuracy)
            
            if verbose and i % 500 == 0:
                print(f"Epoch {i+1}/{epochs}")
                print(f"loss: {loss:.5f}")
                print(f"accuracy: {accuracy:.5f}")
                print("-" * 30)
        
        return {'loss_history': loss_history, 'accuracy_history': accuracy_history}

### FFN Evaluation

The cell below will allow you to evaluate the performance of your FFN on the holdout set.

In [None]:
import numpy as np

def evaluate_on_holdout(data_dict, model):
    """
    Evaluate the trained model on the holdout set

    Args:
        data_dict: Dictionary containing the dataset splits
        model: Trained NumpyNeuralNetwork model

    Returns:
        float: Accuracy on holdout set
        np.ndarray: Confusion matrix
    """
    # Preprocess holdout data
    X_holdout = np.array([img.reshape(-1) / 255 for img in data_dict['holdout']['images']])

    # Get labels and convert to numerical format using the same encoder
    label_encoder = LabelEncoder()
    label_encoder.fit(data_dict['train']['labels'])  # Fit on training data to maintain same mapping
    y_holdout = label_encoder.transform(data_dict['holdout']['labels'])

    # Convert to format needed by model
    X_holdout = X_holdout.T
    y_holdout_onehot = np.eye(26)[y_holdout].T

    # Get predictions
    y_pred, _ = model.full_forward_propagation(X_holdout)
    accuracy = model.get_accuracy_value(y_pred, y_holdout_onehot)

    # Get predicted classes
    predicted_classes = np.argmax(y_pred, axis=0)

    # Create confusion matrix
    from sklearn.metrics import confusion_matrix
    conf_matrix = confusion_matrix(y_holdout, predicted_classes)

    # Print detailed results
    print("\nHoldout Set Evaluation:")
    print(f"Accuracy: {accuracy:.4f}")

    return accuracy, conf_matrix

### Running our FNN

Lets use all of our data to train and evaluate our FFN!

In [None]:
data = load_letter_dataset("alphabet")
X_train, X_test, y_train, y_test = prepare_data(data)

# Convert to proper format
X_train = X_train.T
X_test = X_test.T
y_train_onehot = np.eye(26)[y_train].T
y_test_onehot = np.eye(26)[y_test].T

# Initialize and train model
model = NumpyNeuralNetwork()
history = model.train(X_train, y_train_onehot, batch_size=32, verbose=True)

# Evaluate on holdout set
holdout_accuracy, conf_matrix = evaluate_on_holdout(data, model)

# Visualize results
import matplotlib.pyplot as plt
import seaborn as sns

# Plot training history
plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.plot(history['cost_history'])
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.subplot(1, 2, 2)
plt.plot(history['accuracy_history'])
plt.title('Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.tight_layout()
plt.show()

# Plot confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix on Holdout Set')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

### Target Accuracy: 70% on Holdout Set

Instead of giving hard values, which is basically impossible in deep learning, I'll be giving you a target output accuracy instead. Your goal is to reach 70% accuracy on the holdout set. You'll almost certainly have to test a number of different combinations of architectures and hyperparameters.

### CNN Experiment

While the FFN is okay, it's really not that well suited to image classification tasks such as this. Fighting through the hangover, you recall something about the news channel CNN? Implement a CNN (using pytorch) below and see if you can get a better result than the FFN.

In [None]:
# Cell 1: Imports for both experiments
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
from sklearn.preprocessing import LabelEncoder

class LetterDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.transform = transform

        # Use LabelEncoder to encode the labels
        self.label_encoder = LabelEncoder()
        self.labels = self.label_encoder.fit_transform(labels)  # Fit and transform labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # Get image and label
        image = Image.fromarray(self.images[idx], mode='L')
        label = self.labels[idx]

        # Apply transform to image if specified
        if self.transform:
            image = self.transform(image)

        return image, label

#TODO: Define the neural network architecture
class BasicCNN(nn.Module):
    def __init__(self, num_classes=26):  # Assuming 26 classes (A-Z)):
      """
        Basic CNN for letter classification.

        Inputs:
          - num_classes: Number of output classes (default: 26 for A-Z).

        Output:
          - Logits (before softmax) representing class predictions.
      """
      pass

    def forward(self, x):
      """
        Forward pass of the CNN.

        Inputs:
          - x: Input image tensor of shape (batch_size, 1, 28, 28).

        Output:
          - Logits for classification.
      """
      pass

#TODO: Training function for the CNN
def train_model(model, train_loader, val_loader, device, num_epochs=100):
  """
    Trains a CNN model using mini-batch gradient descent and evaluates it on a validation set.

    Inputs:
      - model: The neural network model to be trained
      - train_loader: DataLoader for the training dataset
      - val_loader: DataLoader for the validation dataset
      - device: The device (CPU or GPU) to run training on
      - num_epochs: Number of epochs for training
      - learning_rate: learning rate

    Outputs:
      - Dictionary containing training loss, training accuracy, and validation accuracy history
  """
    train_losses = []
    train_accs = []
    val_accs = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # TODO: Train the model
            pass

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total

        # TODO: Validate model on validation set
        pass

        val_acc = 100. * correct / total

        train_losses.append(epoch_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, '
                f'Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')

In [None]:
# Load data (using your existing load_letter_dataset function)
data_dict = load_letter_dataset("homework_datasets/alphabet")

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Create datasets
train_dataset = LetterDataset(data_dict['train']['images'],
                                data_dict['train']['labels'],
                                transform=transform)
val_dataset = LetterDataset(data_dict['test']['images'],
                            data_dict['test']['labels'],
                            transform=transform)
holdout_dataset = LetterDataset(data_dict['holdout']['images'],
                                data_dict['holdout']['labels'],
                                transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
holdout_loader = DataLoader(holdout_dataset, batch_size=32)

# Initialize model and training components
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BasicCNN().to(device)

# Train model
train_model(model, train_loader, val_loader, device)

# Evaluate on holdout set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in holdout_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

holdout_acc = 100. * correct / total
print(f'Holdout Accuracy: {holdout_acc:.2f}%')

### Target Accuracy: 80% on Holdout Set

Unfortunately, despite having the text, you still can't read it. It appears to be encoded with some kind of cipher. If only there were seq2seq models that you maybe could use to decode it...