# 04 — Full CV Pipeline Prototype

**Goal:** Wire together YOLOv8s-seg (board detection) + v0 BoggleCNN (letter classification) into an end-to-end pipeline: raw board photo → letter matrix.

**Pipeline stages:**
1. YOLO board detection (segmentation mask)
2. Mask cleanup (morphological ops)
3. Quad fitting (contour → 4 corners)
4. Perspective warp (top-down square board)
5. Grid size detection (intensity profile valleys)
6. Grid-based tile extraction
7. Tile preprocessing (v0-compatible)
8. CNN inference (letter classification)

**Inputs:**
- `yolov8s-seg.pt` — Pretrained COCO segmentation model (23.9 MB)
- `legacy/models/boggle_cnn.pth` — V0 CNN weights (1.6 MB)
- `data/raw/` — 38 board photos (4000x3000 JPGs)

**Outputs:**
- Validated end-to-end pipeline with accuracy metrics
- ONNX exports of both models to `models/`

See: `trevor-misc/docs/02-18-26/full-cv-pipeline-plan.md` for detailed design.

## A. Setup

In [None]:
import random
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from scipy.ndimage import gaussian_filter1d
from scipy.signal import find_peaks
from tqdm.notebook import tqdm
from ultralytics import YOLO

# Reproducibility
SEED = 67
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

print(f"PyTorch {torch.__version__}, OpenCV {cv2.__version__}")

In [None]:
PROJECT_ROOT = Path.cwd().parent / "prototyping"  # prototyping/
DATA_DIR = PROJECT_ROOT / "data"
RAW_DIR = DATA_DIR / "raw"
MODELS_DIR = PROJECT_ROOT / "legacy/models"
LEGACY_DIR = PROJECT_ROOT / "legacy"

# Board detection config
BOARD_CLASSES = {65, 66, 67}  # COCO: remote, keyboard, cell phone
YOLO_CONF = 0.25
YOLO_IMGSZ = 640

# Tile preprocessing config
TARGET_TILE_SIZE = 100  # CNN expects 100x100
GRID_INSET_RATIO = 0.10  # 10% inset per side to avoid grid lines

# CNN class labels (from legacy/settings.py — inlined for self-containment)
CLASS_LABELS = [
    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
    "N", "O", "P", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
    "Qu", "Er", "Th", "In", "An", "He", "BLOCK",
]

raw_files = sorted(RAW_DIR.glob("*.jpg"))
print(f"Raw photos: {len(raw_files)}")
print(f"Models dir: {MODELS_DIR} (exists: {MODELS_DIR.exists()})")

In [None]:
yolo_model = YOLO(str(PROJECT_ROOT / "yolov8s-seg.pt"))
print(f"YOLO model loaded: yolov8s-seg.pt")

In [None]:
class BoggleCNN(nn.Module):
    """V0 BoggleCNN architecture (inlined from legacy/cnn.py)."""

    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.15),
            nn.Conv2d(8, 16, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.15),
            nn.Conv2d(16, 32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * 100, 128),  # 32 channels * 10*10 spatial
            nn.ReLU(),
            nn.Linear(128, len(CLASS_LABELS)),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


cnn_model = BoggleCNN()
cnn_model.load_state_dict(
    torch.load(
        LEGACY_DIR / "models" / "boggle_cnn.pth",
        map_location="cpu",
        weights_only=True,
    )
)
cnn_model.eval()
print(f"CNN loaded: {sum(p.numel() for p in cnn_model.parameters()):,} parameters, {len(CLASS_LABELS)} classes")

In [None]:
test_img_path = raw_files[4]
test_img = cv2.imread(str(test_img_path))
print(f"Test image: {test_img_path.name} ({test_img.shape[1]}x{test_img.shape[0]})")

fig, ax = plt.subplots(1, 1, figsize=(8, 6))
ax.imshow(cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB))
ax.set_title(test_img_path.name)
ax.axis("off")
plt.tight_layout()
plt.show()

## B. Board Detection + Mask Cleanup (Stages 1–2)

In [None]:
def detect_board(image, model, conf=YOLO_CONF, imgsz=YOLO_IMGSZ):
    """Run YOLO inference and return the best board detection.

    Returns (mask, box, confidence) or (None, None, 0.0) on failure.
    mask is uint8 (0/255) at original image resolution.
    """
    # Try at configured confidence first, then retry lower if no board classes found
    for conf_thresh in [conf, 0.15, 0.10]:
        results = model(image, conf=conf_thresh, verbose=False, imgsz=imgsz)
        result = results[0]

        if result.masks is None or len(result.masks) == 0:
            continue

        # Filter for board-like COCO classes only
        board_indices = [
            i
            for i, cls in enumerate(result.boxes.cls)
            if int(cls) in BOARD_CLASSES
        ]

        if not board_indices:
            continue

        # Select highest confidence among board-class detections
        best_idx = max(board_indices, key=lambda i: result.boxes.conf[i].item())

        # Resize mask to original image resolution
        mask = result.masks.data[best_idx].cpu().numpy()
        h, w = image.shape[:2]
        mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
        mask = (mask > 0.5).astype(np.uint8) * 255

        return mask, result.boxes[best_idx], result.boxes.conf[best_idx].item()

    return None, None, 0.0

In [None]:
def cleanup_mask(mask):
    """Clean up raw segmentation mask with morphological ops + largest component."""
    kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)

    kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)

    # Keep largest connected component
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask)
    if num_labels <= 1:
        return mask

    largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
    return (labels == largest_label).astype(np.uint8) * 255

In [None]:
raw_mask, box, det_conf = detect_board(test_img, yolo_model)
print(f"Detection confidence: {det_conf:.3f}")
if box is not None:
    print(f"Detected class: {yolo_model.names[int(box.cls)]}")

if raw_mask is None:
    raise RuntimeError("No board detected in test image!")

clean_mask = cleanup_mask(raw_mask)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

axes[0].imshow(cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB))
axes[0].set_title("Original")
axes[0].axis("off")

overlay = test_img.copy()
overlay[raw_mask > 0] = (
    overlay[raw_mask > 0] * 0.5 + np.array([0, 255, 0]) * 0.5
).astype(np.uint8)
axes[1].imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
axes[1].set_title(f"Raw YOLO Mask (conf={det_conf:.3f})")
axes[1].axis("off")

axes[2].imshow(clean_mask, cmap="gray")
axes[2].set_title("Cleaned Mask")
axes[2].axis("off")

plt.suptitle(f"Board Detection: {test_img_path.name}", fontsize=14)
plt.tight_layout()
plt.show()

## C. Quad Fitting + Perspective Warp (Stages 3–4)

In [None]:
def fit_quad(mask):
    """Fit a quadrilateral to the cleaned mask.

    Returns (corners_4x2_float32, method_string) or (None, reason).
    """
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None, "no_contours"

    contour = max(contours, key=cv2.contourArea)

    # Try progressively coarser approximation until we get 4 points
    for eps_mult in np.arange(0.02, 0.10, 0.005):
        epsilon = eps_mult * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)
        if len(approx) == 4:
            return approx.reshape(4, 2).astype(np.float32), f"approxPolyDP(eps={eps_mult:.3f})"

    # Fallback: minimum area rectangle
    rect = cv2.minAreaRect(contour)
    corners = cv2.boxPoints(rect).astype(np.float32)
    return corners, "minAreaRect"

In [None]:
def order_corners(pts):
    """Order corners as: top-left, top-right, bottom-right, bottom-left."""
    s = pts.sum(axis=1)
    d = np.diff(pts, axis=1).squeeze()
    return np.array(
        [
            pts[np.argmin(s)],  # TL: smallest x+y
            pts[np.argmin(d)],  # TR: smallest y-x
            pts[np.argmax(s)],  # BR: largest x+y
            pts[np.argmax(d)],  # BL: largest y-x
        ],
        dtype=np.float32,
    )


def warp_board(image, corners, pad_pct=0.07):
    """Warp image to top-down square view using 4 corner points.

    pad_pct: expand each corner outward from centroid by this fraction (e.g. 0.03 = 3%).
             Prevents edge tiles from being clipped by a tight mask.

    Returns (warped_image, side_length).
    """
    ordered = order_corners(corners)

    # Expand corners outward from centroid
    if pad_pct > 0:
        centroid = ordered.mean(axis=0)
        ordered = centroid + (1 + pad_pct) * (ordered - centroid)

    side_lengths = [
        np.linalg.norm(ordered[i] - ordered[(i + 1) % 4]) for i in range(4)
    ]
    size = int(np.ceil(max(side_lengths)))

    dst = np.array([[0, 0], [size, 0], [size, size], [0, size]], dtype=np.float32)
    M = cv2.getPerspectiveTransform(ordered, dst)
    warped = cv2.warpPerspective(image, M, (size, size))

    return warped, size

In [None]:
corners, quad_method = fit_quad(clean_mask)
print(f"Quad fitting method: {quad_method}")
print(f"Corners:\n{corners}")

warped, warp_size = warp_board(test_img, corners)
print(f"Warped board size: {warp_size}x{warp_size}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

vis = test_img.copy()
ordered = order_corners(corners)
labels_txt = ["TL", "TR", "BR", "BL"]
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0), (255, 255, 0)]
for pt, label, color in zip(ordered, labels_txt, colors):
    cv2.circle(vis, tuple(pt.astype(int)), 15, color, -1)
    cv2.putText(
        vis,
        label,
        tuple(pt.astype(int) + np.array([20, -10])),
        cv2.FONT_HERSHEY_SIMPLEX,
        1.5,
        color,
        3,
    )
cv2.polylines(vis, [ordered.astype(int).reshape(-1, 1, 2)], True, (0, 255, 0), 3)
axes[0].imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
axes[0].set_title("Detected Corners")
axes[0].axis("off")

axes[1].imshow(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
axes[1].set_title(f"Warped Board ({warp_size}x{warp_size})")
axes[1].axis("off")

plt.tight_layout()
plt.show()

## C+ Tile Detection (Contour-Based)

Instead of cropping to the tile area and dividing uniformly, we find individual tile contours directly — matching the v0 approach. This is more robust to uneven spacing and frame artifacts.

**Strategy:** HSV saturation (Otsu) separates white tiles from the colored frame → light morph ops to clean up each tile blob → find contours → filter for tile-sized roughly-square shapes → sort into grid by centroid positions.

In [None]:
from itertools import combinations
from scipy.ndimage import label as ndlabel, center_of_mass, maximum_filter


def find_tile_centers(warped, debug=False):
    """Find tile center positions from local maxima of the distance transform.

    Strategy:
    1. Blur → Otsu → binary mask of tiles vs frame
    2. Distance transform → peaks at tile centers
    3. Local maxima detection → candidate points (may include noise)

    Returns (centroids_Nx2, debug_info_or_None).  Centroids are (x, y).
    """
    gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    h_img, w_img = gray.shape[:2]

    # Blur to smooth out letter ink
    blur_sigma = max(5, int(w_img * 0.02))
    ksize = blur_sigma * 6 + 1
    blurred = cv2.GaussianBlur(gray, (ksize, ksize), blur_sigma)

    # Otsu threshold
    _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Distance transform
    dist = cv2.distanceTransform(binary, cv2.DIST_L2, 5)

    # Local maxima: each tile center is a peak in the distance map.
    neighborhood = max(3, int(w_img * 0.10))
    local_max = (maximum_filter(dist, size=neighborhood) == dist)
    local_max &= (dist > dist.max() * 0.15)

    # Label connected maxima regions and get centroids
    labeled, n_peaks = ndlabel(local_max)
    if n_peaks == 0:
        if debug:
            return np.empty((0, 2)), {
                "blurred": blurred, "binary": binary, "dist": dist,
                "local_max": local_max, "n_peaks": 0, "blur_sigma": blur_sigma,
                "neighborhood": neighborhood,
            }
        return np.empty((0, 2)), None

    # center_of_mass returns (row, col) = (y, x)
    raw_centroids = center_of_mass(dist, labeled, range(1, n_peaks + 1))
    centroids = np.array([(x, y) for y, x in raw_centroids])

    debug_info = None
    if debug:
        debug_info = {
            "blurred": blurred,
            "binary": binary,
            "dist": dist,
            "local_max": local_max.astype(np.uint8) * 255,
            "n_peaks": n_peaks,
            "centroids": centroids,
            "blur_sigma": blur_sigma,
            "neighborhood": neighborhood,
        }

    return centroids, debug_info


def _group_by_proximity(values, tolerance):
    """Group 1D values into clusters where consecutive sorted values differ by < tolerance.

    Returns list of arrays, each containing indices into the original *values* array.
    """
    order = np.argsort(values)
    sorted_vals = values[order]

    groups = []
    current = [order[0]]
    for i in range(1, len(sorted_vals)):
        if sorted_vals[i] - sorted_vals[i - 1] < tolerance:
            current.append(order[i])
        else:
            groups.append(np.array(current))
            current = [order[i]]
    groups.append(np.array(current))

    return groups


def _score_combo(groups, centroids_or_values, axis_idx=None):
    """Score a combination of groups by population × spacing uniformity.

    Returns (score, sorted_centers).  Higher score = better grid candidate.
    """
    if axis_idx is not None:
        ctrs = np.sort([np.mean(centroids_or_values[g, axis_idx]) for g in groups])
    else:
        ctrs = np.sort([np.mean(centroids_or_values[g]) for g in groups])

    gaps = np.diff(ctrs)
    mean_gap = np.mean(gaps)
    if mean_gap <= 0:
        return -1, ctrs

    cv = np.std(gaps) / mean_gap  # coefficient of variation; 0 = perfectly uniform
    population = sum(len(g) for g in groups)
    score = population * max(0.01, 1 - cv)
    return score, ctrs


def infer_grid_from_centroids(centroids, image_shape, grid_range=(4, 7)):
    """Infer a regular NxN grid from (noisy) peak positions.

    Enforces square-grid + uniform-spacing constraints:
    1. Group y-coords by proximity → candidate rows
    2. For each N in [6, 5, 4]: score all N-choose-k combinations of rows
       by population × spacing uniformity.  Best combo wins.
    3. From the best rows' peaks, repeat for columns.

    Returns (grid_size, row_centers, col_centers, tile_size).
    """
    if len(centroids) < grid_range[0]:
        return 0, np.array([]), np.array([]), 0

    h_img, w_img = image_shape[:2]
    min_tile_spacing = min(h_img, w_img) / grid_range[1]
    tol = min_tile_spacing * 0.25

    # --- Cluster y-values into candidate rows ---
    row_groups = _group_by_proximity(centroids[:, 1], tol)
    row_sizes = np.array([len(g) for g in row_groups])

    # --- Try square grid sizes from largest to smallest ---
    for N in range(grid_range[1] - 1, grid_range[0] - 1, -1):
        if len(row_groups) < N:
            continue

        # Pre-filter to top 2*N most populated groups (keeps combos manageable)
        sorted_indices = np.argsort(-row_sizes)
        candidates = [int(i) for i in sorted_indices[: 2 * N] if row_sizes[i] >= 2]
        if len(candidates) < N:
            continue

        # Score all N-combos of rows
        best_row_score = -1
        best_row_result = None

        for combo in combinations(candidates, N):
            groups = [row_groups[i] for i in combo]
            score, ctrs = _score_combo(groups, centroids, axis_idx=1)
            if score > best_row_score:
                best_row_score = score
                best_row_result = (groups, ctrs)

        if best_row_result is None:
            continue

        row_groups_sel, row_ctrs = best_row_result

        # Collect x-values from peaks in selected rows only
        valid_idx = np.concatenate(row_groups_sel)
        valid_xs = centroids[valid_idx, 0]

        # Cluster x-values into candidate columns
        col_groups = _group_by_proximity(valid_xs, tol)
        col_sizes = np.array([len(g) for g in col_groups])
        if len(col_groups) < N:
            continue

        # Pre-filter to top 2*N col groups
        sorted_c = np.argsort(-col_sizes)
        c_candidates = [int(i) for i in sorted_c[: 2 * N] if col_sizes[i] >= 2]
        if len(c_candidates) < N:
            continue

        # Score all N-combos of columns
        best_col_score = -1
        best_col_ctrs = None

        for combo in combinations(c_candidates, N):
            c_groups = [col_groups[i] for i in combo]
            score, ctrs = _score_combo(c_groups, valid_xs)
            if score > best_col_score:
                best_col_score = score
                best_col_ctrs = ctrs

        if best_col_ctrs is None:
            continue

        col_ctrs = best_col_ctrs

        # Tile size from median spacing
        y_sp = np.median(np.diff(row_ctrs)) if len(row_ctrs) > 1 else h_img / N
        x_sp = np.median(np.diff(col_ctrs)) if len(col_ctrs) > 1 else w_img / N
        tile_size = (y_sp + x_sp) / 2

        return N, row_ctrs, col_ctrs, tile_size

    # Fallback: no valid square grid found
    return 0, np.array([]), np.array([]), 0


def extract_tiles_from_grid(warped, row_centers, col_centers, tile_size, inset_ratio=GRID_INSET_RATIO):
    """Extract tiles from a regular grid defined by row/col centers.

    Returns list of tile BGR images in row-major order.
    """
    h_img, w_img = warped.shape[:2]
    half = int(tile_size * (0.5 - inset_ratio))
    tiles = []

    for ry in np.sort(row_centers):
        for cx in np.sort(col_centers):
            cy, cx_int = int(round(ry)), int(round(cx))
            y1, y2 = max(0, cy - half), min(h_img, cy + half)
            x1, x2 = max(0, cx_int - half), min(w_img, cx_int + half)
            tiles.append(warped[y1:y2, x1:x2])

    return tiles

In [None]:
def correct_tile_perspective(tile_bgr):
    """Straighten a tile crop using its content's oriented bounding box.

    The board-level perspective warp may leave residual per-tile rotation.
    This function finds the white tile face via Otsu thresholding, computes
    its min-area rotated rectangle, and warps to axis-align it.

    Matches the legacy pipeline's per-tile warp_perspective_to_top_down()
    applied via min_area_rectangle_contour in extract_tile_images().
    """
    gray = cv2.cvtColor(tile_bgr, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape[:2]

    # Blur to suppress letter ink, then Otsu to find white tile face
    blur_ksize = max(3, int(min(h, w) * 0.1) | 1)
    blurred = cv2.GaussianBlur(gray, (blur_ksize, blur_ksize), 0)
    _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Find largest contour (the tile face)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return tile_bgr

    largest = max(contours, key=cv2.contourArea)

    # Skip if contour is too small (degenerate case)
    if cv2.contourArea(largest) < 0.3 * h * w:
        return tile_bgr

    # Min-area rotated rectangle → 4 corners
    rect = cv2.minAreaRect(largest)
    box = cv2.boxPoints(rect).astype(np.float32)

    # Order corners: TL, TR, BR, BL (sum/diff method, same as order_corners)
    s = box.sum(axis=1)
    d = np.diff(box, axis=1).squeeze()
    ordered = np.array([
        box[np.argmin(s)],   # TL
        box[np.argmin(d)],   # TR
        box[np.argmax(s)],   # BR
        box[np.argmax(d)],   # BL
    ], dtype=np.float32)

    # Warp to square output (legacy forces maxWidth = maxHeight)
    side_lengths = [np.linalg.norm(ordered[i] - ordered[(i + 1) % 4]) for i in range(4)]
    size = int(np.ceil(max(side_lengths)))
    if size < 10:
        return tile_bgr

    dst = np.array([[0, 0], [size, 0], [size, size], [0, size]], dtype=np.float32)
    M = cv2.getPerspectiveTransform(ordered, dst)
    warped = cv2.warpPerspective(tile_bgr, M, (size, size))

    return warped

In [None]:
# Find tile centers via distance transform local maxima
centroids, dbg = find_tile_centers(warped, debug=True)
print(f"Found {dbg['n_peaks']} raw peaks (blur sigma={dbg['blur_sigma']}, neighborhood={dbg['neighborhood']})")

# Infer grid with square constraint
grid_size, row_centers, col_centers, tile_size = infer_grid_from_centroids(centroids, warped.shape)
print(f"Inferred grid: {grid_size}x{grid_size} ({len(row_centers)} rows, {len(col_centers)} cols)")
print(f"Estimated tile size: {tile_size:.0f}px")

# Extract tiles from inferred grid
tiles = extract_tiles_from_grid(warped, row_centers, col_centers, tile_size)
print(f"Extracted {len(tiles)} tiles")

# Show row-group filtering details
min_tile_spacing = min(warped.shape[:2]) / 7
tol = min_tile_spacing * 0.25
row_groups = _group_by_proximity(centroids[:, 1], tol)
row_sizes = [len(g) for g in row_groups]
print(f"\nRow clustering (tol={tol:.0f}px): {len(row_groups)} groups, sizes={row_sizes}")
print(f"Square constraint → kept top {grid_size} rows by population, top {grid_size} cols")

# Diagnostics
fig, axes = plt.subplots(2, 3, figsize=(20, 12))

axes[0, 0].imshow(dbg["blurred"], cmap="gray")
axes[0, 0].set_title(f"Gaussian blur (sigma={dbg['blur_sigma']})")
axes[0, 0].axis("off")

axes[0, 1].imshow(dbg["dist"], cmap="hot")
axes[0, 1].set_title("Distance transform")
axes[0, 1].axis("off")

# All peaks with grid-intersection markers
vis_peaks = cv2.cvtColor((dbg["dist"] / dbg["dist"].max() * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)
# Mark peaks near grid intersections green, others red
for cx, cy in centroids:
    near_row = any(abs(cy - r) < tol for r in row_centers)
    near_col = any(abs(cx - c) < tol for c in col_centers)
    color = (0, 255, 0) if (near_row and near_col) else (0, 0, 255)
    cv2.circle(vis_peaks, (int(cx), int(cy)), 10, color, 3)
axes[0, 2].imshow(cv2.cvtColor(vis_peaks, cv2.COLOR_BGR2RGB))
n_on_grid = sum(1 for cx, cy in centroids
    if any(abs(cy - r) < tol for r in row_centers) and any(abs(cx - c) < tol for c in col_centers))
axes[0, 2].set_title(f"Peaks: {n_on_grid} on-grid (green) / {dbg['n_peaks'] - n_on_grid} off (red)")
axes[0, 2].axis("off")

# Inferred grid on warped image
vis_grid = warped.copy()
for ry in row_centers:
    cv2.line(vis_grid, (0, int(ry)), (vis_grid.shape[1], int(ry)), (0, 255, 0), 2)
for cx in col_centers:
    cv2.line(vis_grid, (int(cx), 0), (int(cx), vis_grid.shape[0]), (0, 255, 0), 2)
axes[1, 0].imshow(cv2.cvtColor(vis_grid, cv2.COLOR_BGR2RGB))
axes[1, 0].set_title(f"Inferred grid ({grid_size}x{grid_size})")
axes[1, 0].axis("off")

# Extraction regions
vis_extract = warped.copy()
half = int(tile_size * (0.5 - GRID_INSET_RATIO))
for ry in np.sort(row_centers):
    for cx in np.sort(col_centers):
        y, x = int(round(ry)), int(round(cx))
        cv2.rectangle(vis_extract, (x - half, y - half), (x + half, y + half), (0, 255, 0), 2)
axes[1, 1].imshow(cv2.cvtColor(vis_extract, cv2.COLOR_BGR2RGB))
axes[1, 1].set_title("Extraction regions")
axes[1, 1].axis("off")

# First row of tiles
if tiles:
    n_show = min(grid_size, len(tiles))
    tile_strip = np.hstack([cv2.resize(t, (100, 100)) for t in tiles[:n_show]])
    axes[1, 2].imshow(cv2.cvtColor(tile_strip, cv2.COLOR_BGR2RGB))
    axes[1, 2].set_title(f"First row of extracted tiles")
else:
    axes[1, 2].text(0.5, 0.5, "No tiles extracted", ha="center", va="center",
                    transform=axes[1, 2].transAxes, fontsize=14, color="red")
axes[1, 2].axis("off")

plt.suptitle("Tile Detection: Distance Transform Peaks → Square Grid Constraint", fontsize=14)
plt.tight_layout()
plt.show()

## D. Grid Size Detection (Stage 5)

Grid size is now inferred from the tile contour count (e.g. 36 contours → 6x6). The intensity-profile method below serves as a secondary validation.

In [None]:
def detect_grid_size(warped, min_grid=4, max_grid=6):
    """Detect grid size from intensity-profile valleys.

    Returns (grid_size, h_valleys, v_valleys, h_smooth, v_smooth).
    """
    gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    h_profile = gray.mean(axis=1)
    v_profile = gray.mean(axis=0)

    sigma = len(h_profile) * 0.02
    h_smooth = gaussian_filter1d(h_profile, sigma)
    v_smooth = gaussian_filter1d(v_profile, sigma)

    min_distance = len(h_profile) * 0.10
    margin = int(len(h_profile) * 0.05)

    # Find valleys (invert signal to find minima via find_peaks)
    h_valleys, _ = find_peaks(-h_smooth, distance=min_distance, prominence=5)
    v_valleys, _ = find_peaks(-v_smooth, distance=min_distance, prominence=5)

    # Keep only interior valleys (exclude edges)
    h_interior = h_valleys[(h_valleys > margin) & (h_valleys < len(h_profile) - margin)]
    v_interior = v_valleys[(v_valleys > margin) & (v_valleys < len(v_profile) - margin)]

    grid_h = len(h_interior) + 1
    grid_v = len(v_interior) + 1

    # If both agree and are in valid range, use that
    if grid_h == grid_v and min_grid <= grid_h <= max_grid:
        return grid_h, h_interior, v_interior, h_smooth, v_smooth

    h_valid = min_grid <= grid_h <= max_grid
    v_valid = min_grid <= grid_v <= max_grid

    if h_valid and not v_valid:
        return grid_h, h_interior, v_interior, h_smooth, v_smooth
    if v_valid and not h_valid:
        return grid_v, h_interior, v_interior, h_smooth, v_smooth
    if h_valid and v_valid:
        grid_size = round((grid_h + grid_v) / 2)
        grid_size = max(min_grid, min(max_grid, grid_size))
        return grid_size, h_interior, v_interior, h_smooth, v_smooth

    # Fallback: assume 6x6 (Super Big Boggle)
    print(f"WARNING: Grid detection ambiguous (h={grid_h}, v={grid_v}). Defaulting to 6.")
    return 6, h_interior, v_interior, h_smooth, v_smooth

In [None]:
print(f"Peak-based grid: {grid_size}x{grid_size} ({len(row_centers)} rows × {len(col_centers)} cols)")
print(f"Tile size: {tile_size:.0f}px, {len(centroids)} peaks found")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 7))

# Extraction regions on warped image
vis_boxes = warped.copy()
half = int(tile_size * (0.5 - GRID_INSET_RATIO))
for ry in np.sort(row_centers):
    for cx in np.sort(col_centers):
        y, x = int(round(ry)), int(round(cx))
        cv2.rectangle(vis_boxes, (x - half, y - half), (x + half, y + half), (0, 255, 0), 2)
axes[0].imshow(cv2.cvtColor(vis_boxes, cv2.COLOR_BGR2RGB))
axes[0].set_title(f"Tile Grid ({grid_size}x{grid_size}, {len(tiles)} tiles)")
axes[0].axis("off")

# Full NxN grid of extracted tiles
n = grid_size
if len(tiles) >= n * n:
    fig2, axes2 = plt.subplots(n, n, figsize=(2 * n, 2 * n))
    for i in range(n):
        for j in range(n):
            idx = i * n + j
            if idx < len(tiles):
                axes2[i, j].imshow(cv2.cvtColor(tiles[idx], cv2.COLOR_BGR2RGB))
            axes2[i, j].axis("off")
    plt.suptitle(f"Extracted Tiles ({grid_size}x{grid_size})", fontsize=14)

# Intensity profiles (for reference)
ip_grid_size, h_valleys, v_valleys, h_smooth, v_smooth = detect_grid_size(warped)
axes[1].plot(h_smooth, "b-", linewidth=1, label="horizontal")
axes[1].plot(v_smooth, "r-", linewidth=1, alpha=0.7, label="vertical")
for v in h_valleys:
    axes[1].axvline(v, color="b", linestyle="--", alpha=0.4)
for v in v_valleys:
    axes[1].axvline(v, color="r", linestyle="--", alpha=0.4)
axes[1].set_title(f"Intensity Profiles (validation: {ip_grid_size}x{ip_grid_size})")
axes[1].legend()

plt.tight_layout()
plt.show()

## E. Tile Extraction + Preprocessing (Stages 6–7)

In [None]:
def extract_grid_tiles(warped, grid_size, inset_ratio=GRID_INSET_RATIO):
    """Extract NxN tiles from the warped board image with inset."""
    h, w = warped.shape[:2]
    cell_h, cell_w = h / grid_size, w / grid_size
    tiles = []

    for row in range(grid_size):
        for col in range(grid_size):
            cx = int(col * cell_w + cell_w / 2)
            cy = int(row * cell_h + cell_h / 2)
            half = int(cell_w * (0.5 - inset_ratio))

            y1, y2 = max(0, cy - half), min(h, cy + half)
            x1, x2 = max(0, cx - half), min(w, cx + half)
            tiles.append(warped[y1:y2, x1:x2])

    return tiles

In [None]:
def _contour_depth(hierarchy, idx):
    """Walk the parent chain to compute contour depth."""
    depth = 0
    while hierarchy[idx][3] != -1:
        idx = hierarchy[idx][3]
        depth += 1
    return depth


def preprocess_tile_v0(tile_bgr, target_size=TARGET_TILE_SIZE):
    """Replicate v0 preprocessing: adaptive threshold -> contour mask -> center -> resize.

    Produces a 100x100 uint8 image (white letter on black background).

    Key details matched to v0 (board_detection.py extract_tile_images + center_letter_image):
    - Adaptive threshold block_size = tile_area * 0.015 (v0 uses mean_tile_area * 0.015)
    - Only depth-1 contours are filtered by min_area; depth-2+ kept unconditionally
    - Letter is centered in the ORIGINAL tile frame (not tight-cropped), preserving
      the letter-to-canvas ratio the CNN was trained on
    """
    gray = cv2.cvtColor(tile_bgr, cv2.COLOR_BGR2GRAY)

    # Adaptive threshold — v0 uses mean_tile_area * 0.015
    tile_area = gray.shape[0] * gray.shape[1]
    block_size = max(3, int(tile_area * 0.015) | 1)  # must be odd and >= 3
    thresh = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, block_size, 5
    )

    # Binary cleanup
    _, thresh = cv2.threshold(thresh, 200, 255, cv2.THRESH_BINARY)

    # Contour detection with hierarchy
    contours, hierarchy = cv2.findContours(
        thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
    )

    if not contours or hierarchy is None:
        return cv2.resize(gray, (target_size, target_size), interpolation=cv2.INTER_AREA)

    h = hierarchy[0]  # shape: (N, 4) -> [next, prev, child, parent]
    min_area = tile_area * 0.003  # 0.3% — matches v0 production

    # Build binary mask: depth-1 contours -> white, depth-2+ -> black
    # V0 only applies min_area filter to depth-1; depth-2+ are drawn unconditionally
    mask = np.zeros(gray.shape, dtype=np.uint8)
    for i, cnt in enumerate(contours):
        depth = _contour_depth(h, i)
        if depth == 1:  # letter strokes — filter by area
            if cv2.contourArea(cnt) < min_area:
                continue
            cv2.drawContours(mask, [cnt], -1, 255, cv2.FILLED)
        elif depth >= 2:  # counter-spaces (holes in O, B, D, etc.) — always draw
            cv2.drawContours(mask, [cnt], -1, 0, cv2.FILLED)

    # Center letter in ORIGINAL tile frame (matches v0 center_letter_image).
    # This preserves the letter-to-canvas ratio. The old tight-crop approach
    # blew up letters to fill the entire 100x100 canvas, which doesn't match
    # the training data where letters fill ~60-80% of the frame.
    y_coords, x_coords = np.where(mask > 1)
    if len(x_coords) > 0 and len(y_coords) > 0:
        x_min, y_min = np.min(x_coords), np.min(y_coords)
        x_max, y_max = np.max(x_coords), np.max(y_coords)
        cropped = mask[y_min:y_max, x_min:x_max]
        img_h, img_w = mask.shape
        centered = np.zeros_like(mask)
        start_x = (img_w - cropped.shape[1]) // 2
        start_y = (img_h - cropped.shape[0]) // 2
        centered[start_y : start_y + cropped.shape[0], start_x : start_x + cropped.shape[1]] = cropped
        mask = centered

    return cv2.resize(mask, (target_size, target_size), interpolation=cv2.INTER_AREA)


def preprocess_tile_simple(tile_bgr, target_size=TARGET_TILE_SIZE):
    """Minimal preprocessing: grayscale -> invert -> Otsu threshold -> resize."""
    gray = cv2.cvtColor(tile_bgr, cv2.COLOR_BGR2GRAY)
    if gray.mean() > 127:
        gray = 255 - gray
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return cv2.resize(binary, (target_size, target_size), interpolation=cv2.INTER_AREA)

In [None]:
# Tiles are already extracted via contour detection (cell above).
# Show first row: raw crop / v0-preprocessed / simplified
print(f"Using {len(tiles)} tiles ({grid_size}x{grid_size}) from contour detection")

n_show = min(grid_size, len(tiles))
fig, axes = plt.subplots(3, n_show, figsize=(2.5 * n_show, 8))

for col in range(n_show):
    tile = tiles[col]
    v0_proc = preprocess_tile_v0(tile)
    simple_proc = preprocess_tile_simple(tile)

    axes[0, col].imshow(cv2.cvtColor(tile, cv2.COLOR_BGR2RGB))
    axes[0, col].set_title(f"(0,{col})", fontsize=8)
    axes[0, col].axis("off")

    axes[1, col].imshow(v0_proc, cmap="gray")
    axes[1, col].set_title("v0", fontsize=8)
    axes[1, col].axis("off")

    axes[2, col].imshow(simple_proc, cmap="gray")
    axes[2, col].set_title("simple", fontsize=8)
    axes[2, col].axis("off")

axes[0, 0].set_ylabel("Raw", fontsize=10)
axes[1, 0].set_ylabel("V0 Preproc", fontsize=10)
axes[2, 0].set_ylabel("Simple", fontsize=10)

plt.suptitle(f"Tile Preprocessing Comparison (row 0 of {grid_size}x{grid_size})", fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
# Full NxN grid of v0-preprocessed tiles
fig, axes = plt.subplots(grid_size, grid_size, figsize=(2 * grid_size, 2 * grid_size))
for i in range(grid_size):
    for j in range(grid_size):
        idx = i * grid_size + j
        proc = preprocess_tile_v0(tiles[idx])
        axes[i, j].imshow(proc, cmap="gray")
        axes[i, j].axis("off")

plt.suptitle("All V0-Preprocessed Tiles", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Per-tile perspective correction: visual A/B comparison
# Shows raw crop vs corrected crop vs preprocessed output for each variant
corrected_tiles = [correct_tile_perspective(t) for t in tiles]

n_show = min(grid_size, len(tiles))
fig, axes = plt.subplots(4, n_show, figsize=(2.5 * n_show, 10))

for col in range(n_show):
    raw = tiles[col]
    corrected = corrected_tiles[col]
    proc_raw = preprocess_tile_v0(raw)
    proc_corrected = preprocess_tile_v0(corrected)

    axes[0, col].imshow(cv2.cvtColor(cv2.resize(raw, (100, 100)), cv2.COLOR_BGR2RGB))
    axes[0, col].set_title(f"(0,{col})", fontsize=8)
    axes[0, col].axis("off")

    axes[1, col].imshow(cv2.cvtColor(cv2.resize(corrected, (100, 100)), cv2.COLOR_BGR2RGB))
    axes[1, col].axis("off")

    axes[2, col].imshow(proc_raw, cmap="gray")
    axes[2, col].axis("off")

    axes[3, col].imshow(proc_corrected, cmap="gray")
    axes[3, col].axis("off")

axes[0, 0].set_ylabel("Raw Crop", fontsize=10)
axes[1, 0].set_ylabel("Persp. Corrected", fontsize=10)
axes[2, 0].set_ylabel("V0 (no corr.)", fontsize=10)
axes[3, 0].set_ylabel("V0 (corrected)", fontsize=10)

plt.suptitle("Per-Tile Perspective Correction: Before vs After", fontsize=12)
plt.tight_layout()
plt.show()

## F. CNN Inference (Stage 8)

In [None]:
def predict_tile(model, preprocessed_tile):
    """Run CNN inference on a single preprocessed 100x100 tile.

    IMPORTANT: Feeds raw [0, 255] float values — the v0 CNN was trained this way.

    Returns (letter, confidence).
    """
    tensor = (
        torch.from_numpy(preprocessed_tile.astype(np.float32))
        .unsqueeze(0)
        .unsqueeze(0)
    )  # (1, 1, 100, 100)

    with torch.no_grad():
        logits = model(tensor)
        probs = torch.softmax(logits, dim=1)
        conf, idx = probs.max(dim=1)

    return CLASS_LABELS[idx.item()], conf.item()


def predict_tiles_batch(model, preprocessed_tiles):
    """Run CNN inference on a batch of preprocessed tiles.

    Returns (letters_list, confidences_list).
    """
    batch = np.stack(preprocessed_tiles).astype(np.float32)
    tensor = torch.from_numpy(batch).unsqueeze(1)  # (N, 1, 100, 100)

    with torch.no_grad():
        logits = model(tensor)
        probs = torch.softmax(logits, dim=1)
        confs, idxs = probs.max(dim=1)

    letters = [CLASS_LABELS[i.item()] for i in idxs]
    confidences = confs.tolist()
    return letters, confidences

In [None]:
v0_preprocessed = [preprocess_tile_v0(t) for t in tiles]
v0_letters, v0_confs = predict_tiles_batch(cnn_model, v0_preprocessed)

print("V0-Preprocessed Results:")
for i in range(grid_size):
    row_letters = v0_letters[i * grid_size : (i + 1) * grid_size]
    row_confs = v0_confs[i * grid_size : (i + 1) * grid_size]
    row_str = " | ".join(f"{l:>5s} ({c:.2f})" for l, c in zip(row_letters, row_confs))
    print(f"  Row {i}: {row_str}")

print(f"\nMean confidence: {np.mean(v0_confs):.3f}")
print(f"Min confidence:  {np.min(v0_confs):.3f}")
print(f"Tiles < 0.5 conf: {sum(1 for c in v0_confs if c < 0.5)}/{len(v0_confs)}")

In [None]:
simple_preprocessed = [preprocess_tile_simple(t) for t in tiles]
simple_letters, simple_confs = predict_tiles_batch(cnn_model, simple_preprocessed)

print("Simplified Results:")
for i in range(grid_size):
    row_letters = simple_letters[i * grid_size : (i + 1) * grid_size]
    row_confs = simple_confs[i * grid_size : (i + 1) * grid_size]
    row_str = " | ".join(f"{l:>5s} ({c:.2f})" for l, c in zip(row_letters, row_confs))
    print(f"  Row {i}: {row_str}")

print(f"\nMean confidence: {np.mean(simple_confs):.3f}")

agree = sum(1 for a, b in zip(v0_letters, simple_letters) if a == b)
print(f"\nAgreement: {agree}/{len(v0_letters)} tiles ({100 * agree / len(v0_letters):.0f}%)")
print(f"V0 mean conf: {np.mean(v0_confs):.3f} vs Simple mean conf: {np.mean(simple_confs):.3f}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

letter_matrix = np.array(v0_letters).reshape(grid_size, grid_size)
conf_matrix = np.array(v0_confs).reshape(grid_size, grid_size)

# Letter matrix with color-coded confidence
ax = axes[0]
ax.set_xlim(-0.5, grid_size - 0.5)
ax.set_ylim(grid_size - 0.5, -0.5)
for i in range(grid_size):
    for j in range(grid_size):
        c = conf_matrix[i, j]
        color = "green" if c > 0.8 else "orange" if c > 0.5 else "red"
        ax.text(
            j, i, letter_matrix[i, j],
            ha="center", va="center", fontsize=16, fontweight="bold", color=color,
        )
ax.set_title("Predicted Letter Matrix")
ax.set_xticks(range(grid_size))
ax.set_yticks(range(grid_size))
ax.grid(True)

# Confidence heatmap
im = axes[1].imshow(conf_matrix, cmap="RdYlGn", vmin=0, vmax=1)
for i in range(grid_size):
    for j in range(grid_size):
        axes[1].text(
            j, i, f"{conf_matrix[i, j]:.2f}",
            ha="center", va="center", fontsize=9,
        )
axes[1].set_title("Confidence Heatmap")
plt.colorbar(im, ax=axes[1])

plt.suptitle(f"CNN Results: {test_img_path.name}", fontsize=14)
plt.tight_layout()
plt.show()

## D+ Grid Size Validation (confidence-based fallback)

Now that tile extraction and CNN inference are defined, we can run the brute-force grid size validation as a sanity check.

In [None]:
def validate_grid_size_by_confidence(warped_img, cnn, grid_candidates=(4, 5, 6)):
    """Test multiple grid sizes and pick the one with highest mean CNN confidence.

    Uses the uniform grid method as a fallback/validation for contour detection.
    """
    results = {}
    for n in grid_candidates:
        t = extract_grid_tiles(warped_img, n)
        proc = [preprocess_tile_v0(tile) for tile in t]
        _, confs = predict_tiles_batch(cnn, proc)
        results[n] = {
            "mean_conf": np.mean(confs),
            "min_conf": np.min(confs),
            "n_high_conf": sum(1 for c in confs if c > 0.8),
            "total": n * n,
        }

    for n, r in sorted(results.items()):
        print(
            f"  Grid {n}x{n}: mean_conf={r['mean_conf']:.3f}, "
            f"min_conf={r['min_conf']:.3f}, high_conf={r['n_high_conf']}/{r['total']}"
        )

    best = max(results, key=lambda n: results[n]["mean_conf"])
    return best, results


print(f"Contour-based detection: {grid_size}x{grid_size}")
print("Confidence-based validation (uniform grid on warped):")
best_grid, grid_results = validate_grid_size_by_confidence(warped, cnn_model)
print(f"Best by confidence: {best_grid}x{best_grid}")

## G. End-to-End Pipeline

In [None]:
def analyze_board(image_path, grid_size_override=None, preprocessing="v0"):
    """Full CV pipeline: image path -> letter matrix.

    Returns a dict with 'letters', 'confidences', 'grid_size', 'mean_confidence',
    and 'stages' (intermediate results). On failure, returns a dict with 'error'.
    """
    preprocess_fn = preprocess_tile_v0 if preprocessing == "v0" else preprocess_tile_simple

    image = cv2.imread(str(image_path))
    if image is None:
        return {"error": f"Failed to load {image_path}"}

    stages = {"image_shape": image.shape}

    # Stage 1: Board detection
    mask, box, det_conf = detect_board(image, yolo_model)
    if mask is None:
        return {"error": "No board detected", "stages": stages}
    stages["detection_conf"] = det_conf
    stages["detection_class"] = yolo_model.names[int(box.cls)] if box is not None else None

    # Stage 2: Mask cleanup
    clean = cleanup_mask(mask)

    # Stage 3: Quad fitting
    corners, quad_method = fit_quad(clean)
    if corners is None:
        return {"error": "Quad fitting failed", "stages": stages}
    stages["quad_method"] = quad_method

    # Stage 4: Perspective warp
    warped_img, warp_sz = warp_board(image, corners)
    stages["warp_size"] = warp_sz
    stages["warped"] = warped_img

    # Stage 5: Find tile centers via distance transform peaks
    tile_centroids, _ = find_tile_centers(warped_img)
    if len(tile_centroids) < 4:
        return {"error": f"Only found {len(tile_centroids)} tile peaks", "stages": stages}

    # Stage 5b: Infer grid from centroid clustering
    gs, rows, cols, tsize = infer_grid_from_centroids(tile_centroids, warped_img.shape)
    if grid_size_override:
        gs = grid_size_override
    stages["grid_size"] = gs
    stages["n_peaks"] = len(tile_centroids)

    # Stage 6: Extract tiles from inferred grid
    tile_imgs = extract_tiles_from_grid(warped_img, rows, cols, tsize)

    # Stage 6b: Per-tile perspective correction (matches legacy per-tile warp)
    tile_imgs = [correct_tile_perspective(t) for t in tile_imgs]
    stages["tiles"] = tile_imgs

    # Stages 7-8: Preprocessing + CNN
    preprocessed = [preprocess_fn(t) for t in tile_imgs]
    letters, confs = predict_tiles_batch(cnn_model, preprocessed)

    letter_grid = [letters[i * gs : (i + 1) * gs] for i in range(gs)]
    conf_grid = [confs[i * gs : (i + 1) * gs] for i in range(gs)]

    return {
        "letters": letter_grid,
        "confidences": conf_grid,
        "grid_size": gs,
        "mean_confidence": float(np.mean(confs)),
        "min_confidence": float(np.min(confs)),
        "stages": stages,
    }

In [None]:
# Test on 4 photos spread across the dataset
test_indices = [0, 10, 20, 30]
test_paths = [raw_files[i] for i in test_indices if i < len(raw_files)]

fig, axes = plt.subplots(len(test_paths), 3, figsize=(18, 5 * len(test_paths)))
if len(test_paths) == 1:
    axes = axes.reshape(1, -1)

for row, img_path in enumerate(test_paths):
    result = analyze_board(img_path)

    if "error" in result:
        for col in range(3):
            axes[row, col].text(
                0.5, 0.5, f"ERROR: {result['error']}",
                ha="center", va="center", transform=axes[row, col].transAxes,
                fontsize=12, color="red",
            )
            axes[row, col].set_title(img_path.name, fontsize=9)
            axes[row, col].axis("off")
        continue

    # Original
    img = cv2.imread(str(img_path))
    axes[row, 0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    axes[row, 0].set_title(img_path.name, fontsize=9)
    axes[row, 0].axis("off")

    # Warped board
    axes[row, 1].imshow(cv2.cvtColor(result["stages"]["warped"], cv2.COLOR_BGR2RGB))
    axes[row, 1].set_title(f"Warped ({result['grid_size']}x{result['grid_size']})", fontsize=9)
    axes[row, 1].axis("off")

    # Letter matrix
    gs = result["grid_size"]
    ax = axes[row, 2]
    ax.set_xlim(-0.5, gs - 0.5)
    ax.set_ylim(gs - 0.5, -0.5)
    for i in range(gs):
        for j in range(gs):
            c = result["confidences"][i][j]
            color = "green" if c > 0.8 else "orange" if c > 0.5 else "red"
            ax.text(
                j, i, result["letters"][i][j],
                ha="center", va="center", fontsize=14, fontweight="bold", color=color,
            )
    ax.set_title(f"Mean conf: {result['mean_confidence']:.3f}", fontsize=9)
    ax.set_xticks(range(gs))
    ax.set_yticks(range(gs))
    ax.grid(True)

plt.suptitle("End-to-End Pipeline Results", fontsize=14)
plt.tight_layout()
plt.show()

### Perspective Correction: Quantitative Before/After

Compare CNN confidence and predictions with and without per-tile perspective correction across all 38 photos.

In [None]:
# Quantitative comparison: with vs without perspective correction
# Run on all photos, comparing per-tile confidence and predictions

comparison_records = []

for img_path in tqdm(raw_files, desc="Comparing with/without correction"):
    image = cv2.imread(str(img_path))
    if image is None:
        continue

    # Shared stages: detection → warp → tile extraction
    mask, box, det_conf = detect_board(image, yolo_model)
    if mask is None:
        continue
    clean = cleanup_mask(mask)
    corners, _ = fit_quad(clean)
    if corners is None:
        continue
    warped_img, _ = warp_board(image, corners)
    tile_centroids, _ = find_tile_centers(warped_img)
    if len(tile_centroids) < 4:
        continue
    gs, rows, cols, tsize = infer_grid_from_centroids(tile_centroids, warped_img.shape)
    if gs == 0:
        continue

    raw_tiles = extract_tiles_from_grid(warped_img, rows, cols, tsize)

    # Without correction
    proc_raw = [preprocess_tile_v0(t) for t in raw_tiles]
    letters_raw, confs_raw = predict_tiles_batch(cnn_model, proc_raw)

    # With correction
    corrected_tiles = [correct_tile_perspective(t) for t in raw_tiles]
    proc_corr = [preprocess_tile_v0(t) for t in corrected_tiles]
    letters_corr, confs_corr = predict_tiles_batch(cnn_model, proc_corr)

    comparison_records.append({
        "filename": img_path.name,
        "grid_size": gs,
        "mean_conf_raw": np.mean(confs_raw),
        "mean_conf_corr": np.mean(confs_corr),
        "min_conf_raw": np.min(confs_raw),
        "min_conf_corr": np.min(confs_corr),
        "n_changed": sum(a != b for a, b in zip(letters_raw, letters_corr)),
        "n_tiles": len(raw_tiles),
        "n_low_raw": sum(1 for c in confs_raw if c < 0.5),
        "n_low_corr": sum(1 for c in confs_corr if c < 0.5),
    })

comp_df = pd.DataFrame(comparison_records)

# Summary stats
print("=" * 70)
print("  PERSPECTIVE CORRECTION: BEFORE vs AFTER")
print("=" * 70)
print(f"  Boards evaluated:       {len(comp_df)}")
print(f"  Mean conf (before):     {comp_df['mean_conf_raw'].mean():.4f}")
print(f"  Mean conf (after):      {comp_df['mean_conf_corr'].mean():.4f}")
delta = comp_df['mean_conf_corr'].mean() - comp_df['mean_conf_raw'].mean()
print(f"  Delta:                  {delta:+.4f} ({'improved' if delta > 0 else 'worsened'})")
print(f"  Min conf (before):      {comp_df['min_conf_raw'].mean():.4f}")
print(f"  Min conf (after):       {comp_df['min_conf_corr'].mean():.4f}")
total_changed = comp_df['n_changed'].sum()
total_tiles = comp_df['n_tiles'].sum()
print(f"  Predictions changed:    {total_changed}/{total_tiles} tiles ({100 * total_changed / total_tiles:.1f}%)")
print(f"  Low-conf tiles before:  {comp_df['n_low_raw'].sum()}")
print(f"  Low-conf tiles after:   {comp_df['n_low_corr'].sum()}")
print("=" * 70)

# Per-board breakdown for boards with changes
changed = comp_df[comp_df["n_changed"] > 0].sort_values("n_changed", ascending=False)
if len(changed) > 0:
    print(f"\nBoards with prediction changes ({len(changed)}):")
    for _, row in changed.iterrows():
        delta_i = row["mean_conf_corr"] - row["mean_conf_raw"]
        print(f"  {row['filename']}: {row['n_changed']} tiles changed, "
              f"conf {row['mean_conf_raw']:.3f} → {row['mean_conf_corr']:.3f} ({delta_i:+.3f})")

In [None]:
# Visualization: before vs after confidence distribution
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Paired bar chart: per-board mean confidence
x = np.arange(len(comp_df))
width = 0.35
axes[0].bar(x - width / 2, comp_df["mean_conf_raw"], width, label="Without correction", alpha=0.8)
axes[0].bar(x + width / 2, comp_df["mean_conf_corr"], width, label="With correction", alpha=0.8)
axes[0].set_xlabel("Board index")
axes[0].set_ylabel("Mean confidence")
axes[0].set_title("Per-Board Mean Confidence")
axes[0].legend()
axes[0].set_ylim(0.5, 1.0)

# Scatter: before vs after (each point = one board)
axes[1].scatter(comp_df["mean_conf_raw"], comp_df["mean_conf_corr"], s=40, alpha=0.7)
lim = [min(comp_df["mean_conf_raw"].min(), comp_df["mean_conf_corr"].min()) - 0.02, 1.0]
axes[1].plot(lim, lim, "k--", alpha=0.4, label="No change")
axes[1].set_xlabel("Mean conf (without correction)")
axes[1].set_ylabel("Mean conf (with correction)")
axes[1].set_title("Before vs After (above line = improved)")
axes[1].legend()
axes[1].set_aspect("equal")

# Delta histogram
deltas = comp_df["mean_conf_corr"] - comp_df["mean_conf_raw"]
axes[2].hist(deltas, bins=20, edgecolor="black", alpha=0.7)
axes[2].axvline(0, color="black", linestyle="--", alpha=0.4)
axes[2].axvline(deltas.mean(), color="red", linestyle="--", label=f"Mean: {deltas.mean():+.4f}")
axes[2].set_xlabel("Confidence delta (after - before)")
axes[2].set_ylabel("Count")
axes[2].set_title("Distribution of Confidence Changes")
axes[2].legend()

plt.suptitle("Per-Tile Perspective Correction Impact", fontsize=14)
plt.tight_layout()
plt.show()

## H. Batch Evaluation

In [None]:
import pandas as pd

batch_results = []
for img_path in tqdm(raw_files, desc="Processing all boards"):
    result = analyze_board(img_path)
    result["filename"] = img_path.name
    # Drop large intermediate data to save memory
    if "stages" in result:
        result["stages"].pop("warped", None)
        result["stages"].pop("tiles", None)
    batch_results.append(result)

successes = [r for r in batch_results if "error" not in r]
failures = [r for r in batch_results if "error" in r]
print(f"\nSuccess: {len(successes)}/{len(batch_results)} ({100 * len(successes) / len(batch_results):.1f}%)")
print(f"Failures: {len(failures)}")

In [None]:
summary_records = []
for r in batch_results:
    record = {"filename": r["filename"]}
    if "error" in r:
        record["status"] = "FAIL"
        record["error"] = r["error"]
        record["grid_size"] = None
        record["mean_conf"] = None
        record["min_conf"] = None
        record["det_conf"] = r.get("stages", {}).get("detection_conf", None)
    else:
        record["status"] = "OK"
        record["error"] = None
        record["grid_size"] = r["grid_size"]
        record["mean_conf"] = r["mean_confidence"]
        record["min_conf"] = r["min_confidence"]
        record["det_conf"] = r["stages"]["detection_conf"]
    summary_records.append(record)

summary_df = pd.DataFrame(summary_records)
display(summary_df.sort_values("mean_conf", ascending=True))

In [None]:
# Flag problematic results
if len(successes) > 0:
    low_conf = [r for r in successes if r["mean_confidence"] < 0.7]
    very_low = [r for r in successes if r["min_confidence"] < 0.3]

    print(f"Low mean confidence (<0.7): {len(low_conf)} boards")
    for r in low_conf:
        print(f"  {r['filename']}: mean={r['mean_confidence']:.3f}, min={r['min_confidence']:.3f}")

    print(f"\nVery low tile confidence (<0.3): {len(very_low)} boards")
    for r in very_low:
        flat_confs = [c for row in r["confidences"] for c in row]
        flat_letters = [l for row in r["letters"] for l in row]
        bad_tiles = [(l, f"{c:.2f}") for l, c in zip(flat_letters, flat_confs) if c < 0.3]
        print(f"  {r['filename']}: {len(bad_tiles)} bad tiles: {bad_tiles}")

if failures:
    print(f"\nFailures:")
    for r in failures:
        print(f"  {r['filename']}: {r['error']}")

In [None]:
if len(successes) > 0:
    all_confs = [r["mean_confidence"] for r in successes]
    all_min_confs = [r["min_confidence"] for r in successes]
    grid_sizes = [r["grid_size"] for r in successes]

    print("=" * 60)
    print("  BATCH EVALUATION SUMMARY")
    print("=" * 60)
    print(f"  Total images:        {len(batch_results)}")
    print(f"  Successful:          {len(successes)} ({100 * len(successes) / len(batch_results):.1f}%)")
    print(f"  Failed:              {len(failures)}")
    print(f"  Mean confidence:     {np.mean(all_confs):.3f}")
    print(f"  Median confidence:   {np.median(all_confs):.3f}")
    print(f"  Min mean confidence: {np.min(all_confs):.3f}")
    print(f"  Grid sizes:          {dict(zip(*np.unique(grid_sizes, return_counts=True)))}")
    print("=" * 60)

    fig, ax = plt.subplots(figsize=(8, 4))
    ax.hist(all_confs, bins=15, edgecolor="black", alpha=0.7)
    ax.axvline(np.mean(all_confs), color="red", linestyle="--", label=f"Mean: {np.mean(all_confs):.3f}")
    ax.set_xlabel("Mean CNN Confidence per Board")
    ax.set_ylabel("Count")
    ax.set_title("Confidence Distribution Across All Boards")
    ax.legend()
    plt.tight_layout()
    plt.show()

## I. ONNX Export

In [None]:
MODELS_DIR.mkdir(parents=True, exist_ok=True)

# YOLO ONNX export (writes next to the .pt file, i.e. prototyping/yolov8s-seg.onnx)
yolo_onnx_path = yolo_model.export(
    format="onnx",
    imgsz=YOLO_IMGSZ,
    simplify=True,
    dynamic=False,
    half=False,
)
print(f"YOLO ONNX exported to: {yolo_onnx_path}")
print(f"YOLO ONNX size: {Path(yolo_onnx_path).stat().st_size / 1024 / 1024:.1f} MB")

In [None]:
import onnx

dummy_input = torch.randn(1, 1, 100, 100)
cnn_onnx_path = MODELS_DIR / "boggle_cnn.onnx"

torch.onnx.export(
    cnn_model,
    dummy_input,
    str(cnn_onnx_path),
    input_names=["image"],
    output_names=["logits"],
    dynamic_axes={"image": {0: "batch"}, "logits": {0: "batch"}},
    opset_version=17,
)

onnx_model = onnx.load(str(cnn_onnx_path))
onnx.checker.check_model(onnx_model)

print(f"CNN ONNX exported to: {cnn_onnx_path}")
print(f"CNN ONNX size: {cnn_onnx_path.stat().st_size / 1024 / 1024:.2f} MB")

In [None]:
import onnxruntime as ort

session = ort.InferenceSession(str(cnn_onnx_path))

# Compare PyTorch vs ONNX on first 10 tiles
n_test = min(10, len(tiles))
test_tiles_proc = [preprocess_tile_v0(tiles[i]) for i in range(n_test)]

print("PyTorch vs ONNX comparison:")
print(f"{'Tile':>4} {'PT Letter':>10} {'PT Conf':>8} {'OX Letter':>10} {'OX Conf':>8} {'Match':>6}")
print("-" * 52)

all_match = True
for i, proc in enumerate(test_tiles_proc):
    pt_letter, pt_conf = predict_tile(cnn_model, proc)

    input_array = proc.astype(np.float32)[np.newaxis, np.newaxis, :, :]
    onnx_logits = session.run(None, {"image": input_array})[0]
    onnx_probs = np.exp(onnx_logits) / np.exp(onnx_logits).sum(axis=1, keepdims=True)
    onnx_idx = onnx_probs.argmax(axis=1)[0]
    onnx_conf = onnx_probs[0, onnx_idx]
    onnx_letter = CLASS_LABELS[onnx_idx]

    match = pt_letter == onnx_letter
    if not match:
        all_match = False

    print(
        f"{i:>4} {pt_letter:>10} {pt_conf:>8.4f} {onnx_letter:>10} {onnx_conf:>8.4f} "
        f"{'OK' if match else 'MISMATCH':>6}"
    )

if all_match:
    print("\nAll predictions match between PyTorch and ONNX.")
else:
    print("\nWARNING: Some predictions differ between PyTorch and ONNX!")

In [None]:
print("=" * 60)
print("  ONNX EXPORT SUMMARY")
print("=" * 60)
print(f"  YOLO model:  {yolo_onnx_path}")
print(f"    Input:     (1, 3, {YOLO_IMGSZ}, {YOLO_IMGSZ}) float32")
print(f"    Size:      {Path(yolo_onnx_path).stat().st_size / 1024 / 1024:.1f} MB")
print(f"  CNN model:   {cnn_onnx_path}")
print(f"    Input:     (batch, 1, 100, 100) float32 [values 0-255]")
print(f"    Output:    (batch, 32) logits")
print(f"    Size:      {cnn_onnx_path.stat().st_size / 1024 / 1024:.2f} MB")
print(f"  Class labels: {len(CLASS_LABELS)} classes")
print("=" * 60)