In [None]:
# runs in jupyter container on node-eval-offline
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
from PIL import Image

In [None]:
# runs in jupyter container on node-eval-offline
model_path = "models/mobilenetv2_canary.pt"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load(model_path, map_location=device, weights_only=False)
_ = model.eval()

In [None]:
# runs in jupyter container on node-eval-offline
from torch.utils.data import Dataset
from PIL import Image

font_data_dir = os.getenv("FONT_DATA_DIR", "/mnt/evaluation_filtered")
subset_font_path = "/home/jovyan/work/fontsubset.txt"

# Load font name to index map
with open(subset_font_path) as f:
    font_list = [line.strip() for line in f if line.strip()]
font_to_index = {name: idx for idx, name in enumerate(font_list)}

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class FontDataset(Dataset):
    def __init__(self, img_dir, transform, font_to_index):
        self.img_dir = img_dir
        self.transform = transform
        self.font_to_index = font_to_index
        self.image_files = [f for f in os.listdir(img_dir) if f.endswith(".png")]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_filename = self.image_files[idx]
        txt_filename = img_filename.replace(".png", ".txt")

        img_path = os.path.join(self.img_dir, img_filename)
        txt_path = os.path.join(self.img_dir, txt_filename)

        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        with open(txt_path) as f:
            font_name = f.read().strip()

        label = self.font_to_index[font_name]
        return image, label

test_dataset = FontDataset(font_data_dir, val_test_transform, font_to_index)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
# Ensure model is on the correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Preallocate arrays
dataset_size = len(test_loader.dataset)
all_predictions = np.empty(dataset_size, dtype=np.int64)
all_labels = np.empty(dataset_size, dtype=np.int64)

# Evaluation loop
current_index = 0

with torch.no_grad():
    for images, labels in test_loader:
        batch_size = labels.size(0)

        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        all_predictions[current_index:current_index + batch_size] = predicted.cpu().numpy()
        all_labels[current_index:current_index + batch_size] = labels.cpu().numpy()
        current_index += batch_size


In [None]:
# runs in jupyter container on node-eval-offline
overall_accuracy = (all_predictions == all_labels).sum() / all_labels.shape[0] * 100
print(f'Overall Accuracy: {overall_accuracy:.2f}%')

In [None]:
# runs in jupyter container on node-eval-offline
with open("/home/jovyan/work/fontsubset.txt", "r") as f:
    classes = np.array([line.strip() for line in f if line.strip()])

num_classes = classes.shape[0]


In [None]:
# runs in jupyter container on node-eval-offline
per_class_correct = np.zeros(num_classes, dtype=np.int32)
per_class_total = np.zeros(num_classes, dtype=np.int32)

for true_label, pred_label in zip(all_labels, all_predictions):
    per_class_total[true_label] += 1
    per_class_correct[true_label] += int(true_label == pred_label)

for i in range(num_classes):
    if per_class_total[i] > 0:
        acc = per_class_correct[i] / per_class_total[i] * 100
        correct_str = f"{per_class_correct[i]}/{per_class_total[i]}"
        print(f"{classes[i]:<20} {acc:10.2f}% {correct_str:>20}")

In [None]:
# runs in jupyter container on node-eval-offline
conf_matrix = np.zeros((num_classes, num_classes), dtype=np.int32)
for true_label, pred_label in zip(all_labels, all_predictions):
    conf_matrix[true_label, pred_label] += 1

plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', xticklabels=classes, yticklabels=classes, cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# runs in jupyter container on node-eval-offline

font_1 = "CourierStd"
font_2 = "AmigoStd"

font1_index = np.where(classes == font_1)[0][0]
font2_index = np.where(classes == font_2)[0][0]

confused_indices = [i for i, (t, p) in enumerate(zip(all_labels, all_predictions))
                    if (t == font1_index and p == font2_index) or (t == font2_index and p == font1_index)]

sample_indices = np.random.choice(confused_indices, size=min(5, len(confused_indices)), replace=False)

# For controlled demo/discussion, override with hardcoded indices (optional)
# sample_indices = np.array([404, 927, 496, 435, 667])  # ← only use this if you know these exist in your data


In [None]:
# runs in jupyter container on node-eval-offline

sample_images = []
global_index = 0

for images, _ in test_loader:
    batch_size = images.size(0)
    for idx in sample_indices:
        if global_index <= idx < global_index + batch_size:
            image = images[idx - global_index].cpu()
            sample_images.append((idx, image))
    global_index += batch_size
    if len(sample_images) == len(sample_indices):
        break


In [None]:
# runs in jupyter container on node-eval-offline
mean = torch.tensor([0.485, 0.456, 0.406])
std = torch.tensor([0.229, 0.224, 0.225])
# Visualize those samples (undo the normalization first)
plt.figure(figsize=(12, 3))
for i, (idx, image) in enumerate(sample_images):
    image = image * std[:, None, None] + mean[:, None, None]  # unnormalize
    image = torch.clamp(image, 0, 1)
    image = image.permute(1, 2, 0)  # go from "channels, height, width" format to "height, width, channels"
    plt.subplot(1, len(sample_images), i + 1)
    plt.imshow(image)
    plt.title(f"True: {classes[all_labels[idx]]}\nPred: {classes[all_predictions[idx]]}\nIndex: {idx}")
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# runs in jupyter container on node-eval-offline
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

# GradCAM setup
target_layer = model.features[-1]
cam = GradCAM(model=model, target_layers=[target_layer])

In [None]:
# runs in jupyter container on node-eval-offline

mean = torch.tensor([0.485, 0.456, 0.406])
std = torch.tensor([0.229, 0.224, 0.225])

plt.figure(figsize=(12, 3))
for i, (idx, image) in enumerate(sample_images):
    input_tensor = (image.clone() - mean[:, None, None]) / std[:, None, None]  # normalize
    input_tensor = input_tensor.unsqueeze(0)  # add batch dim

    target_category = int(all_predictions[idx])
    grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(target_category)])
    grayscale_cam = grayscale_cam[0, :]

    image_disp = image * std[:, None, None] + mean[:, None, None]  # unnormalize
    image_disp = torch.clamp(image_disp, 0, 1).permute(1, 2, 0).numpy()

    visualization = show_cam_on_image(image_disp, grayscale_cam, use_rgb=True)
    plt.subplot(1, len(sample_images), i + 1)
    plt.imshow(visualization)
    plt.title(f"True: {classes[all_labels[idx]]}\nPred: {classes[all_predictions[idx]]}\nIndex: {idx}")
    plt.axis('off')
plt.tight_layout()
plt.show()

template based tests

In [None]:
# runs in jupyter container on node-eval-offline

TEMPLATE_DIR = "templates"

fig, axes = plt.subplots(2, 3, figsize=(8, 5))

# Fonts row
font_dir = os.path.join(TEMPLATE_DIR, "fonts")
font_classes = [d for d in os.listdir(font_dir) if os.listdir(os.path.join(font_dir, d))]
random_font = random.choice(font_classes)
font_images = random.sample(os.listdir(os.path.join(font_dir, random_font)), 3)
font_paths = [os.path.join(font_dir, random_font, f) for f in font_images]

for i, path in enumerate(font_paths):
    axes[0, i].imshow(Image.open(path))
    axes[0, i].set_title(f"Font ({random_font})")
    axes[0, i].axis("off")

# Backgrounds row
bg_dir = os.path.join(TEMPLATE_DIR, "background")
bg_images = random.sample(os.listdir(bg_dir), 3)
bg_paths = [os.path.join(bg_dir, f) for f in bg_images]

for i, path in enumerate(bg_paths):
    axes[1, i].imshow(Image.open(path))
    axes[1, i].set_title("Background")
    axes[1, i].axis("off")

plt.tight_layout()
plt.show()


In [None]:
# runs in jupyter container on node-eval-offline

def compose_font_image(font_path, bg_path=None, extra_path=None):
    """
    Composes a font image with optional background and optional overlay.
    All inputs are image file paths.
    Returns a final composited RGB image.
    """

    font_img = Image.open(font_path).convert("RGBA")

    # Background setup
    if bg_path:
        bg = Image.open(bg_path).convert("RGBA").resize(font_img.size)
    else:
        bg = Image.new("RGBA", font_img.size, (255, 255, 255, 255))

    bg_w, bg_h = bg.size
    y_offset = int(bg_h * 0.05)

    # Resize font image (scale down slightly)
    font_scale = 0.8
    font_img = font_img.resize((int(bg_w * font_scale), int(bg_h * font_scale)))
    ft_w, ft_h = font_img.size

    # Optional overlay (e.g., emoji, occlusion, sticker)
    if extra_path:
        extra_scale = 0.35
        extra = Image.open(extra_path).convert("RGBA")
        extra = extra.resize((int(bg_w * extra_scale), int(bg_h * extra_scale)))
        ex_w, ex_h = extra.size
        bg.paste(extra, (bg_w - ex_w, bg_h - ex_h - y_offset), extra)

    # Paste font image near the bottom center
    bg.paste(font_img, ((bg_w - ft_w) // 2, bg_h - ft_h - y_offset), font_img)

    return bg.convert("RGB")


CourierStd

In [None]:
# runs in jupyter container on node-eval-offline

imgs = {
    'original_image': compose_font_image('templates/fonts/CourierStd/CourierStd_0.png'),
    'composed_bg1_extra1': compose_font_image('templates/fonts/CourierStd/CourierStd_0.png',
                                              'templates/background/001.png',
                                              'templates/extras/smiley_face.png'),
    'composed_bg2_extra2': compose_font_image('templates/fonts/CourierStd/CourierStd_0.png',
                                              'templates/background/002.png',
                                              'templates/extras/circle.png'),
    'composed_same_class': compose_font_image('templates/fonts/CourierStd/CourierStd_1.png',
                                              'templates/background/001.png'),
    'composed_diff_class': compose_font_image('templates/fonts/ImpactLTStd/ImpactLTStd_2.png',
                                              'templates/background/001.png')
}


In [None]:
# runs in jupyter container on node-eval-offline

fig, axes = plt.subplots(1, 5, figsize=(14, 3))

for ax, key in zip(axes, imgs.keys()):
    ax.imshow(imgs[key].resize((224, 224)).crop((16, 16, 224, 224)))
    ax.set_title(f"{key}")
    ax.axis("off")

plt.tight_layout()
plt.show()


In [None]:
def predict(model, image, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    model.eval()
    image_tensor = val_test_transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image_tensor)
        return output.argmax(dim=1).item()


In [None]:
# runs in jupyter container on node-eval-offline

fig, axes = plt.subplots(2, 5, figsize=(14, 6))

for i, key in enumerate(imgs.keys()):
    image_np = np.array(imgs[key].resize((224, 224))).astype(dtype=np.float32) / 255.0
    pred = predict(model, imgs[key])

    input_tensor = val_test_transform(imgs[key]).unsqueeze(0)
    grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(pred)])[0]
    vis = show_cam_on_image(image_np, grayscale_cam, use_rgb=True)

    axes[0, i].imshow(imgs[key].resize((224, 224)))
    axes[0, i].set_title(f"{key}\nPredicted: {pred} ({classes[pred]})")
    axes[0, i].axis("off")

    axes[1, i].imshow(vis)
    axes[1, i].axis("off")

plt.tight_layout()
plt.show()

AmigoStd

In [None]:
imgs = {
    'original_image': compose_font_image('templates/fonts/AmigoStd/AmigoStd_0.png'),
    'composed_bg1_extra1': compose_font_image('templates/fonts/AmigoStd/AmigoStd_0.png',
                                              'templates/background/003.png',
                                              'templates/extras/smiley_face.png'),
    'composed_bg2_extra2': compose_font_image('templates/fonts/AmigoStd/AmigoStd_0.png',
                                              'templates/background/002.png',
                                              'templates/extras/arrow.png'),
    'composed_same_class': compose_font_image('templates/fonts/AmigoStd/AmigoStd_1.png',
                                              'templates/background/003.png'),
    'composed_diff_class': compose_font_image('templates/fonts/CourierStd/CourierStd_2.png',
                                              'templates/background/003.png')
}


In [None]:
# runs in jupyter container on node-eval-offline

fig, axes = plt.subplots(2, 5, figsize=(14, 6))

for i, key in enumerate(imgs.keys()):
    image_np = np.array(imgs[key].resize((224, 224))).astype(dtype=np.float32) / 255.0
    pred = predict(model, imgs[key])

    input_tensor = val_test_transform(imgs[key]).unsqueeze(0)
    grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(pred)])[0]
    vis = show_cam_on_image(image_np, grayscale_cam, use_rgb=True)

    axes[0, i].imshow(imgs[key].resize((224, 224)))
    axes[0, i].set_title(f"{key}\nPredicted: {pred} ({classes[pred]})")
    axes[0, i].axis("off")

    axes[1, i].imshow(vis)
    axes[1, i].axis("off")

plt.tight_layout()
plt.show()

Impact

In [None]:
imgs = {
    'original_image': compose_font_image('templates/fonts/ImpactLTStd/ImpactLTStd_0.png'),
    'composed_bg1_extra1': compose_font_image('templates/fonts/ImpactLTStd/ImpactLTStd_0.png',
                                              'templates/background/003.png',
                                              'templates/extras/smiley_face.png'),
    'composed_bg2_extra2': compose_font_image('templates/fonts/ImpactLTStd/ImpactLTStd_0.png',
                                              'templates/background/002.png',
                                              'templates/extras/arrow.png'),
    'composed_same_class': compose_font_image('templates/fonts/ImpactLTStd/ImpactLTStd_1.png',
                                              'templates/background/003.png'),
    'composed_diff_class': compose_font_image('templates/fonts/AmigoStd/AmigoStd_2.png',
                                              'templates/background/003.png')
}


In [None]:
# runs in jupyter container on node-eval-offline

fig, axes = plt.subplots(2, 5, figsize=(14, 6))

for i, key in enumerate(imgs.keys()):
    image_np = np.array(imgs[key].resize((224, 224))).astype(dtype=np.float32) / 255.0
    pred = predict(model, imgs[key])

    input_tensor = val_test_transform(imgs[key]).unsqueeze(0)
    grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(pred)])[0]
    vis = show_cam_on_image(image_np, grayscale_cam, use_rgb=True)

    axes[0, i].imshow(imgs[key].resize((224, 224)))
    axes[0, i].set_title(f"{key}\nPredicted: {pred} ({classes[pred]})")
    axes[0, i].axis("off")

    axes[1, i].imshow(vis)
    axes[1, i].axis("off")

plt.tight_layout()
plt.show()

Looks like test

In [None]:
# runs in jupyter container on node-eval-offline

gibberish_dir = "gibberish_looks_like"
font_folders = [f for f in os.listdir(gibberish_dir) if os.path.isdir(os.path.join(gibberish_dir, f))]

selected_images = []

# Sample one gibberish image from each font
for font_folder in font_folders:
    folder_path = os.path.join(gibberish_dir, font_folder)
    images = [f for f in os.listdir(folder_path) if f.endswith(".png")]
    if images:
        chosen = random.choice(images)
        selected_images.append((font_folder, os.path.join(folder_path, chosen)))

# Plot the selected gibberish samples
fig, axes = plt.subplots(1, len(selected_images), figsize=(5 * len(selected_images), 3))

for ax, (font_name, img_path) in zip(axes, selected_images):
    image = Image.open(img_path).convert("RGB")
    pred = predict(model, image)

    ax.imshow(image.resize((224, 224)).crop((16, 16, 224, 224)))
    ax.set_title(f"{font_name}\nPred: {classes[pred]}", fontsize=8)
    ax.axis("off")

plt.tight_layout()
plt.show()


Test Suite

In [None]:
!pytest --verbose --tb=no tests/

In [None]:
# tests/test_fonts.py
def test_prediction_format():
    pred = model.predict(image)
    assert isinstance(pred, int)


In [None]:
!pytest --verbose --lf --tb=no tests/


In [None]:
!pytest --verbose --tb=no tests/test_fontdetector_test_cases.py
