In [None]:
"""
Step 1: Extract and Load Pre-collected Data

Since we don't have access to the OpenTouch Interface on Mac, we'll use
pre-collected touch sensor data that has already been captured.

* The coin_data.zip file contains organized PNG images from different coin types.
* Each folder represents a different class/label (e.g., one_euro, two_euro, etc.).
* We'll extract this data and inspect its structure.

Note: This data was originally collected using DIGIT touch sensors on Ubuntu.
"""

import os
import zipfile
from pathlib import Path

zip_path = "coin_data.zip"
extract_dir = Path(os.path.splitext(zip_path)[0] + "_extracted")

# Create extraction directory
os.makedirs(extract_dir, exist_ok=True)

print(f"Extracting {zip_path} into {extract_dir}...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# Check what was extracted
if extract_dir.exists():
    print(f"\nSuccessfully extracted dataset to: {extract_dir}")

    # List the coin types (subdirectories)
    coin_types = [d.name for d in extract_dir.iterdir() if d.is_dir()]
    coin_types.sort()

    print(f"Found {len(coin_types)} coin types:")
    for coin_type in coin_types:
        coin_dir = extract_dir / coin_type
        num_images = len(list(coin_dir.glob("*.png")))
        print(f"  - {coin_type}: {num_images} images")
else:
    print(f"ERROR: {extract_dir} directory not found after extraction!")
    print(f"Make sure {zip_path} is in your working directory.")

In [None]:
"""
Step 2: Load Images for One Label

Load the images from one coin type folder into a camera_data list.
This simulates what would happen when loading data from a single .touch file.

* Choose which coin type folder to process (e.g., "two_euro", "one_euro").
* Load all images from that folder into a list of numpy arrays.
* This creates the same `camera_data` variable as in the sensor data workflow.
"""

import os
import numpy as np
from PIL import Image
from pathlib import Path

# TODO: Choose which label to process
label_name = "two_euro"  # Change this to: "one_euro", "ten_cent", "no_touch", etc.

# Path to the extracted dataset
dataset_dir = Path("coin_data_extracted")
coin_path = dataset_dir / label_name

if not coin_path.exists():
    available = [d.name for d in dataset_dir.iterdir() if d.is_dir()]
    raise FileNotFoundError(f"Directory '{coin_path}' does not exist. Available folders: {available}")

# Get all PNG files and sort them
image_files = sorted([f for f in os.listdir(coin_path) if f.lower().endswith('.png')])

# Load each image as a numpy array
camera_data = []
for img_file in image_files:
    img_path = coin_path / img_file
    img = Image.open(img_path)
    img_array = np.array(img)
    camera_data.append(img_array)

print(f"Loaded {len(camera_data)} images from '{label_name}' folder")
if camera_data:
    print(f"Image shape: {camera_data[0].shape}")


In [None]:
"""
Step 3: Filter the Frames

Each dataset should only contain images of its respective label.
Remove frames that don't match (e.g., "no touch" images in a "coin" dataset).

Why?
The raw data also includes unwanted frames (like empty touches or noise).
Filtering ensures that each dataset is clean and only contains the intended label.

* Exception: If you are creating a "no touch" dataset, keep the empty frames.
* Hint: You can do this both programmatically and using your file explorer.
"""

import numpy as np

no_touch = camera_data[:20]  # Assume the first 20 images don't show any touch. Adjust as needed.
avg_empty_image = np.mean(np.stack(no_touch, axis=0), axis=0)

def mean_square_error(image_a: np.ndarray, image_b: np.ndarray) -> float:
    diff = image_a - image_b
    return np.mean(diff ** 2)

threshold = 40.0  # <-- TODO: Adjust as needed
# print(mean_square_error(avg_empty_image, camera_data[100]))

with_touch = [frame for frame in camera_data if mean_square_error(frame, avg_empty_image) > threshold]
# with_touch = camera_data  # Use this when having a dataset with no touch
print(f'There are {len(with_touch)} images with recognized touch')

In [None]:
"""
Step 4: Save the Cleaned Dataset

Now save the filtered frames to disk.

* Saving them as `.png` files makes it easy to inspect the images in your file explorer.
* Each dataset (per label) will be stored in its own folder.

DON'T MODIFY THIS CELL. SIMPLY RUN.
"""

import os
from PIL import Image

# Use the label name as the dataset name
directory = os.path.join('coin_data', label_name)
os.makedirs(directory, exist_ok=True)

print(f'Saving {len(with_touch)} images to {directory}/')
for i, frame in enumerate(with_touch):
    img = Image.fromarray(frame.astype(np.uint8))
    img.save(os.path.join(directory, f'{label_name}_{i:04d}.png'))

In [None]:
"""
Step 5: Create a simple model to convert RGB images to grayscale.

* The filter converts RGB images to grayscale using the standard luminosity method.
* It inherits from `BaseFilter` and implements `forward` and `onnx_export`.
* Finally, the model is saved to disk for later use.
"""

import numpy as np
import matplotlib.pyplot as plt

def rgb_to_grayscale(image: np.ndarray) -> np.ndarray:
    """
    Convert an RGB image (H, W, 3) uint8 to grayscale (H, W) uint8.
    """
    r, g, b = image[..., 0], image[..., 1], image[..., 2]
    grayscale = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return grayscale.astype(np.uint8)

# Take the first image from the loaded dataset
original = camera_data[0]   # shape (H, W, 3), dtype=uint8

# Convert to grayscale
gray = rgb_to_grayscale(original)

# Show both images
plt.figure(figsize=(8, 4))

plt.subplot(1, 2, 1)
plt.imshow(original)
plt.title("Original")
plt.axis("off")

plt.subplot(1, 2, 2)
plt.imshow(gray, cmap="gray")
plt.title("Grayscale")
plt.axis("off")

plt.tight_layout()
plt.show()


In [None]:
"""
Step 6: Define the CNN Model

We now build a Convolutional Neural Network (CNN) to classify the coins.
"""

import torch.nn as nn
from torch import Tensor

class CoinClassifier(nn.Module):
    def __init__(self, num_classes: int) -> None:
        super().__init__()
        self.num_classes = num_classes

        # Define the CNN architecture
        self.model = nn.Sequential(
            # First conv block
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 320x240 -> 160x120

            # Second conv block
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 160x120 -> 80x60

            # Third conv block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 80x60 -> 40x30

            # Global average pooling and classifier
            nn.AdaptiveAvgPool2d(1),  # 40x30 -> 1x1
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x: Tensor) -> Tensor:
        """Forward pass through the CNN"""
        return self.model(x)

    @staticmethod
    def preprocess(x: Tensor) -> Tensor:
        """
        Preprocess input images from [N, H, W, C] to [N, C, H, W].
        Normalizes pixel values from [0, 255] to [0, 1].
        """
        x = x.float() / 255.0
        return x.permute(0, 3, 1, 2)


In [None]:
"""
This is an alternative to step 6 where we use a pre-trained model from PyTorch.
"""

import torch.nn as nn
from torch import Tensor

class CoinClassifierEfficientNet(nn.Module):
    def __init__(self, num_classes: int) -> None:
        super().__init__()

        from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights

        # Load pretrained EfficientNet-B4 backbone
        weights = EfficientNet_B4_Weights.DEFAULT
        backbone = efficientnet_b4(weights=weights)

        # Freeze backbone parameters
        for param in backbone.parameters():
            param.requires_grad = False

        # Replace classifier head
        backbone.classifier[1] = nn.Linear(
            backbone.classifier[1].in_features,
            num_classes,
            bias=True
        )

        self.model = backbone
        self.num_classes = num_classes

    def forward(self, x: Tensor) -> Tensor:
        return self.model(x)

    @staticmethod
    def preprocess(x: Tensor) -> Tensor:
        """
        Preprocess input images from [N, H, W, C] uint8 [0, 255].
        Converts to [N, C, 380, 380] float32 normalized for EfficientNet.
        """
        x = x.float() / 255.0
        x = x.permute(0, 3, 1, 2)  # [N, C, H, W]

        inout_h, input_w = 240, 320
        target_size = 380

        # Scale while preserving aspect ratio
        scale = min(target_size / inout_h, target_size / input_w)
        new_h, new_w = int(inout_h * scale), int(input_w * scale)

        x = torch.nn.functional.interpolate(x, size=(new_h, new_w), mode='bilinear', align_corners=False)

        # Pad to square
        pad_h = target_size - new_h
        pad_w = target_size - new_w
        pad_top = pad_h // 2
        pad_left = pad_w // 2
        pad_bottom = pad_h - pad_top
        pad_right = pad_w - pad_left

        x = torch.nn.functional.pad(x, (pad_left, pad_right, pad_top, pad_bottom))

        # ImageNet normalization
        mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
        std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
        x = (x - mean) / std

        return x


In [None]:
"""
Step 7: Load the Coin Datasets

Now we load the saved images back into memory.

* Each coin type should be in its own subdirectory under `coin_data/`.
* A label mapping is created automatically from the folder names.
* Optionally, datasets are balanced so all classes have the same number of images.

DON'T MODIFY THIS CELL. SIMPLY RUN.
"""
import os
from PIL import Image

def load_coin_datasets(dset_path: str, balance_datasets = True, max_size=2000) -> tuple[np.ndarray, np.ndarray, dict[int, str]]:
    """
    Load coin images from subdirectories

    Expected structure:
    coin_data/
    ├── two_euro/
    │   ├── two_euro_0001.png
    │   └── ...
    """
    if not os.path.exists(dset_path):
        raise FileNotFoundError(f"Directory '{dset_path}' does not exist.")

    coin_dirs = [d for d in os.listdir(dset_path) if os.path.isdir(os.path.join(dset_path, d))]
    coin_dirs.sort()

    if not coin_dirs:
        raise ValueError(f"No subdirectories found in the directory '{dset_path}'.")

    # Create label mapping
    labels = {j: coin_name for j, coin_name in enumerate(coin_dirs)}
    print(f"Found {len(coin_dirs)} coin types:")
    for label, name in labels.items():
        print(f"  Label {label}: {name}")

    # Load all images
    coin_images = {}  # label -> list of images
    for label, coin_name in labels.items():
        coin_path = os.path.join(dset_path, coin_name)

        all_files = os.listdir(coin_path)
        image_files = [f for f in all_files if f.lower().endswith('.png')]
        image_files.sort()

        images = []
        for img_file in image_files:
            img_path = os.path.join(coin_path, img_file)
            img = Image.open(img_path)
            img_array = np.array(img)
            images.append(img_array)
        coin_images[label] = images

    # Balance datasets
    all_images, all_labels = [], []
    if balance_datasets:
        smallest_dset = min(min(len(images) for images in coin_images.values()), max_size)
        for label, images in coin_images.items():
            all_images.extend(images[:smallest_dset])
            all_labels.extend([label] * smallest_dset)
    else:
        for label, images in coin_images.items():
            all_images.extend(images)
            all_labels.extend([label] * len(images))

    # Convert to numpy arrays
    X = np.stack(all_images, axis=0)  # Shape: (N, H, W, C)
    Y = np.array(all_labels)  # Shape: (N,)

    return X, Y, labels

In [None]:
"""
Step 8: Train the Model

Now we bring everything together:

* Load the datasets and convert them to PyTorch tensors.
* Create a DataLoader for batching and shuffling.
* Initialize and train the CNN.
"""

import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

# Load datasets
X, y, label_mapping = load_coin_datasets("coin_data_extracted")

# Convert to PyTorch tensors
X_tensor = torch.from_numpy(X).to(torch.uint8)   # keep uint8 for preprocessing
y_tensor = torch.from_numpy(y).long()

# Create dataset and dataloader
dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Device selection
if hasattr(torch, "xpu") and torch.xpu.is_available():
    device = torch.device("xpu")
    print("Using XPU (Intel Arc / oneAPI)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA (NVIDIA)")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Apple Silicon / macOS GPU)")
else:
    device = torch.device("cpu")
    print("Using CPU")

# Create the model (choose one)
num_classes = len(label_mapping)
# model = CoinClassifier(num_classes=num_classes)
model = CoinClassifierEfficientNet(num_classes=num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_x, batch_y in dataloader:
        # Move data to device
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        # Preprocess inputs
        batch_x = model.preprocess(batch_x)

        # Forward
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(dataloader)
    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

print("Training complete!")


In [None]:
"""
Step 9: Predict from a Single Image

We now define a helper function to classify one image with the trained PyTorch model.

* The image is expanded with a batch dimension before inference.
* The model returns class scores (logits).
* The highest-scoring class is mapped back to its label.
"""

def predict_coin(image: np.ndarray, model: torch.nn.Module, label_mapping: dict[int, str]) -> str:
    """Predict coin type from a single image."""
    model.eval()

    # Ensure input is on the same device as the model
    device = next(model.parameters()).device
    input_batch = torch.from_numpy(np.expand_dims(image, axis=0)).to(torch.uint8).to(device)

    with torch.no_grad():
        input_batch = model.preprocess(input_batch)
        output = model(input_batch)

    predicted_class = torch.argmax(output, dim=1).item()
    predicted_label = label_mapping[predicted_class]

    return predicted_label


In [None]:
"""
Step 10: Test the Model

Finally, let's check the trained model on a few random images.

* Pick random samples from the dataset.
* Run predictions with `predict_coin`.
* Compare predicted vs. true labels.
"""

import random

for i in range(20):
    idx = random.randint(0, len(X) - 1)
    test_image = X[idx]
    true_label = label_mapping[y[idx]]

    predicted_label = predict_coin(test_image, model, label_mapping)
    print(f"True label: {true_label} | Predicted label: {predicted_label} | {true_label == predicted_label}")


In [None]:
"""
Step 11: Evaluate Accuracy

Instead of just printing individual results,
we can calculate overall accuracy across 100 random images.
"""

import random

num_tests = 100
correct = 0

for i in range(num_tests):
    idx = random.randint(0, len(X) - 1)
    test_image = X[idx]
    true_label = label_mapping[y[idx]]
    predicted_label = predict_coin(test_image, model, label_mapping)

    if predicted_label == true_label:
        correct += 1

accuracy = correct / num_tests
print(f"Accuracy over {num_tests} random samples: {accuracy:.2%}")
