In [None]:
"""
Step 1: Extract and Load Pre-collected Data

Since we don't have access to the OpenTouch Interface on Mac, we'll use
pre-collected touch sensor data that has already been captured.

* The coin_data.zip file contains organized PNG images from different coin types.
* Each folder represents a different class/label (e.g., one_euro, two_euro, etc.).
* We'll extract this data and inspect its structure.

Note: This data was originally collected using DIGIT touch sensors on Ubuntu.
"""

import os
import zipfile
from pathlib import Path

zip_path = "coin_data.zip"
extract_dir = Path(os.path.splitext(zip_path)[0] + "_extracted")

# TODO: Extract the dataset into the extraction directory and inspect its contents


In [None]:
"""
Step 2: Load Images for One Label

Load the images from one coin type folder into a camera_data list.
This simulates what would happen when loading data from a single .touch file.

* Choose which coin type folder to process (e.g., "two_euro", "one_euro").
* Load all images from that folder into a list of numpy arrays.
* This creates the same `camera_data` variable as in the sensor data workflow.
"""

import os
import numpy as np
from PIL import Image
from pathlib import Path

# TODO: Load all images from one chosen coin type folder into camera_data
label_name = "two_euro"

...

camera_data: list = [...]


In [None]:
"""
Step 3: Filter the Frames

Each dataset should only contain images of its respective label.
Remove frames that don't match (e.g., "no touch" images in a "coin" dataset).

Why?
The raw data also includes unwanted frames (like empty touches or noise).
Filtering ensures that each dataset is clean and only contains the intended label.

* Exception: If you are creating a "no touch" dataset, keep the empty frames.
* Hint: You can do this both programmatically and using your file explorer.
"""

import numpy as np

# TODO: Implement a filtering strategy to remove "no touch" or irrelevant frames
#       (e.g., by comparing with an average empty frame or by manual inspection)

with_touch: list = [...]


In [None]:
"""
Step 4: Save the Cleaned Dataset

Now save the filtered frames to disk.

* Saving them as `.png` files makes it easy to inspect the images in your file explorer.
* Each dataset (per label) will be stored in its own folder.

DON'T MODIFY THIS CELL. SIMPLY RUN.
"""

import os
from PIL import Image

# Use the label name as the dataset name
directory = os.path.join('coin_data', label_name)
os.makedirs(directory, exist_ok=True)

print(f'Saving {len(with_touch)} images to {directory}/')
for i, frame in enumerate(with_touch):
    img = Image.fromarray(frame.astype(np.uint8))
    img.save(os.path.join(directory, f'{label_name}_{i:04d}.png'))

In [None]:
"""
Step 5: Create a simple model to convert RGB images to grayscale.

* The filter converts RGB images to grayscale using the standard luminosity method.
* Then display the original and grayscale images side by side.
"""

import numpy as np
import matplotlib.pyplot as plt

# TODO: Implement a function to convert an RGB image to grayscale

# TODO: Pick one image from the dataset and apply your grayscale function

# TODO: Display the original and grayscale images side by side


In [None]:
"""
Step 6: Define the CNN Model

We now build a Convolutional Neural Network (CNN) to classify the coins.
"""

import torch.nn as nn
from torch import Tensor


class CoinClassifier(nn.Module):
    def __init__(self, num_classes: int) -> None:
        super().__init__()
        self.num_classes = num_classes

        # TODO: Define the CNN architecture here
        self.model = ...

    def forward(self, x: Tensor) -> Tensor:
        """Forward pass through the CNN"""
        return self.model(x)

    @staticmethod
    def preprocess(x: Tensor) -> Tensor:
        """
        Preprocess input images from [N, H, W, C] to [N, C, H, W].
        Normalizes pixel values from [0, 255] to [0, 1].
        """
        x = x.float() / 255.0
        return x.permute(0, 3, 1, 2)


In [None]:
"""
This is an alternative to Step 6 where we use a pre-trained model from PyTorch.
"""

import torch.nn as nn
from torch import Tensor


class CoinClassifierEfficientNet(nn.Module):
    def __init__(self, num_classes: int) -> None:
        super().__init__()

        from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights

        # TODO: Load a pretrained EfficientNet-B4 backbone
        # TODO: Freeze its parameters
        # TODO: Replace the classifier head to match num_classes

        self.model = ...
        self.num_classes = num_classes

    def forward(self, x: Tensor) -> Tensor:
        return self.model(x)

    @staticmethod
    def preprocess(x: Tensor) -> Tensor:
        """
        Preprocess input images from [N, H, W, C] uint8 [0, 255].
        Converts to [N, C, 380, 380] float32 normalized for EfficientNet.
        """
        x = x.float() / 255.0
        x = x.permute(0, 3, 1, 2)  # [N, C, H, W]

        inout_h, input_w = 240, 320
        target_size = 380

        # Scale while preserving aspect ratio
        scale = min(target_size / inout_h, target_size / input_w)
        new_h, new_w = int(inout_h * scale), int(input_w * scale)

        x = torch.nn.functional.interpolate(
            x, size=(new_h, new_w), mode="bilinear", align_corners=False
        )

        # Pad to square
        pad_h = target_size - new_h
        pad_w = target_size - new_w
        pad_top = pad_h // 2
        pad_left = pad_w // 2
        pad_bottom = pad_h - pad_top
        pad_right = pad_w - pad_left

        x = torch.nn.functional.pad(x, (pad_left, pad_right, pad_top, pad_bottom))

        # ImageNet normalization
        mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
        std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
        x = (x - mean) / std

        return x


In [None]:
"""
Step 7: Load the Coin Datasets

Now we load the saved images back into memory.

* Each coin type should be in its own subdirectory under `coin_data/`.
* A label mapping is created automatically from the folder names.
* Optionally, datasets are balanced so all classes have the same number of images.

DON'T MODIFY THIS CELL. SIMPLY RUN.
"""
import os
from PIL import Image

def load_coin_datasets(dset_path: str, balance_datasets = True, max_size=2000) -> tuple[np.ndarray, np.ndarray, dict[int, str]]:
    """
    Load coin images from subdirectories

    Expected structure:
    coin_data/
    ├── two_euro/
    │   ├── two_euro_0001.png
    │   └── ...
    """
    if not os.path.exists(dset_path):
        raise FileNotFoundError(f"Directory '{dset_path}' does not exist.")

    coin_dirs = [d for d in os.listdir(dset_path) if os.path.isdir(os.path.join(dset_path, d))]
    coin_dirs.sort()

    if not coin_dirs:
        raise ValueError(f"No subdirectories found in the directory '{dset_path}'.")

    # Create label mapping
    labels = {j: coin_name for j, coin_name in enumerate(coin_dirs)}
    print(f"Found {len(coin_dirs)} coin types:")
    for label, name in labels.items():
        print(f"  Label {label}: {name}")

    # Load all images
    coin_images = {}  # label -> list of images
    for label, coin_name in labels.items():
        coin_path = os.path.join(dset_path, coin_name)

        all_files = os.listdir(coin_path)
        image_files = [f for f in all_files if f.lower().endswith('.png')]
        image_files.sort()

        images = []
        for img_file in image_files:
            img_path = os.path.join(coin_path, img_file)
            img = Image.open(img_path)
            img_array = np.array(img)
            images.append(img_array)
        coin_images[label] = images

    # Balance datasets
    all_images, all_labels = [], []
    if balance_datasets:
        smallest_dset = min(min(len(images) for images in coin_images.values()), max_size)
        for label, images in coin_images.items():
            all_images.extend(images[:smallest_dset])
            all_labels.extend([label] * smallest_dset)
    else:
        for label, images in coin_images.items():
            all_images.extend(images)
            all_labels.extend([label] * len(images))

    # Convert to numpy arrays
    X = np.stack(all_images, axis=0)  # Shape: (N, H, W, C)
    Y = np.array(all_labels)  # Shape: (N,)

    return X, Y, labels

In [None]:
"""
Step 8: Train the Model

Now we bring everything together:

* Load the datasets and convert them to PyTorch tensors.
* Create a DataLoader for batching and shuffling.
* Initialize and train the CNN.
"""

import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

# TODO: Load datasets with `load_coin_datasets("coin_data_extracted")`
X, y, label_mapping = ...

# TODO: Convert the images and labels to PyTorch tensors
X_tensor = ...
y_tensor = ...

# TODO: Wrap the tensors in a TensorDataset and create a DataLoader
dataset = ...
dataloader = ...

# Device selection
if hasattr(torch, "xpu") and torch.xpu.is_available():
    device = torch.device("xpu")
    print("Using XPU (Intel Arc / oneAPI)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA (NVIDIA)")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Apple Silicon / macOS GPU)")
else:
    device = torch.device("cpu")
    print("Using CPU")

# TODO: Create the model (choose CoinClassifier or CoinClassifierEfficientNet) and move it to the device
num_classes = len(label_mapping)
model = ...

# TODO: Define the loss function and optimizer
criterion = ...
optimizer = ...

# TODO: Implement the training loop
num_epochs = ...
for epoch in range(num_epochs):
    ...


In [None]:
"""
Step 9: Predict from a Single Image

We now define a helper function to classify one image with the trained PyTorch model.

* The image is expanded with a batch dimension before inference.
* The model returns class scores (logits).
* The highest-scoring class is mapped back to its label.
"""

def predict_coin(image: np.ndarray, model: torch.nn.Module, label_mapping: dict[int, str]) -> str:
    """Predict coin type from a single image."""
    model.eval()

    # Ensure input is on the same device as the model
    device = next(model.parameters()).device
    input_batch = torch.from_numpy(np.expand_dims(image, axis=0)).to(torch.uint8).to(device)

    with torch.no_grad():
        input_batch = model.preprocess(input_batch)
        output = model(input_batch)

    predicted_class = torch.argmax(output, dim=1).item()
    predicted_label = label_mapping[predicted_class]

    return predicted_label


In [None]:
"""
Step 10: Test the Model

Finally, let's check the trained model on a few random images.

* Pick random samples from the dataset.
* Run predictions with `predict_coin`.
* Compare predicted vs. true labels.
"""

import random

for i in range(20):
    idx = random.randint(0, len(X) - 1)
    test_image = X[idx]
    true_label = label_mapping[y[idx]]

    predicted_label = predict_coin(test_image, model, label_mapping)
    print(f"True label: {true_label} | Predicted label: {predicted_label} | {true_label == predicted_label}")


In [None]:
"""
Step 11: Evaluate Accuracy

Instead of just printing individual results,
we can calculate overall accuracy across 100 random images.
"""

import random

num_tests = 100
correct = 0

for i in range(num_tests):
    idx = random.randint(0, len(X) - 1)
    test_image = X[idx]
    true_label = label_mapping[y[idx]]
    predicted_label = predict_coin(test_image, model, label_mapping)

    if predicted_label == true_label:
        correct += 1

accuracy = correct / num_tests
print(f"Accuracy over {num_tests} random samples: {accuracy:.2%}")
