# YOLO-FastestV2 Digit Detection

Train a lightweight YOLO-style detector for handwritten digit recognition on ESP32-CAM.

**Model**: YoloFastestV2 (adapted from https://github.com/dog-qiuqiu/Yolo-FastestV2)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
PROJECT_ROOT = "/content/drive/My Drive/EE4065_Project"
DATASET_PATH = os.path.join(PROJECT_ROOT, "dataset")
OUTPUT_PATH = os.path.join(PROJECT_ROOT, "output")

os.makedirs(DATASET_PATH, exist_ok=True)
os.makedirs(OUTPUT_PATH, exist_ok=True)

print(f"Dataset: {DATASET_PATH}")
print(f"Output: {OUTPUT_PATH}")

In [None]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import sys
sys.path.append(os.getcwd())

from dataset import SyntheticDigitDataset, RESOLUTION, CELLS, CATEGORIES
from model import YoloFastestV2, DetectionLoss

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH = 16
EPOCHS = 15
LR = 0.001

## Data Preparation

In [None]:
train_data = SyntheticDigitDataset(train_mode=True, root_folder=DATASET_PATH)
loader = DataLoader(train_data, batch_size=BATCH, shuffle=True, num_workers=2)

sample_imgs, _ = next(iter(loader))
fig, axes = plt.subplots(1, 4, figsize=(12, 3))
for i, ax in enumerate(axes):
    ax.imshow(sample_imgs[i].squeeze(), cmap='gray', vmin=0, vmax=1)
    ax.axis('off')
plt.suptitle('Training Samples')
plt.show()

## Training

In [None]:
net = YoloFastestV2().to(DEVICE)
opt = optim.Adam(net.parameters(), lr=LR)
loss_fn = DetectionLoss()

history = []

for ep in range(EPOCHS):
    net.train()
    ep_loss = 0
    
    for imgs, truth in loader:
        imgs, truth = imgs.to(DEVICE), truth.to(DEVICE)
        
        opt.zero_grad()
        out = net(imgs)
        loss = loss_fn(out, truth)
        loss.backward()
        opt.step()
        
        ep_loss += loss.item()
        
    avg = ep_loss / len(loader)
    history.append(avg)
    print(f"[{ep+1}/{EPOCHS}] Loss: {avg:.4f}")

print("Done.")

## Results

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(range(1, EPOCHS+1), history, 'b-o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Progress')
plt.grid(True)
plt.savefig(os.path.join(OUTPUT_PATH, 'training_curve.png'))
plt.show()

In [None]:
def extract_predictions(model, dataloader):
    pred_list, true_list = [], []
    model.eval()
    with torch.no_grad():
        for imgs, truth in dataloader:
            out = model(imgs.to(DEVICE)).cpu().numpy()
            truth = truth.numpy()
            
            mask = truth[..., 0] == 1
            if mask.sum() > 0:
                pred_cat = np.argmax(out[mask][:, 5:], axis=1)
                true_cat = np.argmax(truth[mask][:, 5:], axis=1)
                pred_list.extend(pred_cat)
                true_list.extend(true_cat)
    return true_list, pred_list

y_true, y_pred = extract_predictions(net, loader)

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels=range(10))

plt.figure(figsize=(8, 8))
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')
plt.savefig(os.path.join(OUTPUT_PATH, 'confusion_matrix.png'))
plt.show()

## Export

In [None]:
PT_FILE = os.path.join(OUTPUT_PATH, "yolo_fastestv2.pt")
ONNX_FILE = os.path.join(OUTPUT_PATH, "yolo_fastestv2.onnx")

torch.save(net.state_dict(), PT_FILE)
print(f"Saved: {PT_FILE}")

dummy = torch.randn(1, 1, RESOLUTION, RESOLUTION).to(DEVICE)
torch.onnx.export(net, dummy, ONNX_FILE, input_names=["input"], output_names=["output"])
print(f"Exported: {ONNX_FILE}")

## TFLite Conversion

In [None]:
!pip install -q onnx==1.16.1 onnx2tf tensorflow onnx-graphsurgeon tf-keras ai-edge-litert sng4onnx simple-onnx-processing-tools

import numpy as np
print("Generating calibration samples...")
cal_iter = iter(loader)
cal_samples = [next(cal_iter)[0].numpy() for _ in range(10)]
cal_array = np.concatenate(cal_samples, axis=0)
np.save("calibration.npy", cal_array)
print(f"Calibration shape: {cal_array.shape}")

TFLITE_DIR = os.path.join(OUTPUT_PATH, "tflite")
os.makedirs(TFLITE_DIR, exist_ok=True)

!onnx2tf -i "{ONNX_FILE}" -o "{TFLITE_DIR}" -oiqt -qt per-tensor -cind "input" "calibration.npy" "[[[[0]]]]" "[[[[1]]]]"

In [None]:
from converter import tflite_to_header

tflite_file = None
for f in os.listdir(TFLITE_DIR):
    if f.endswith(".tflite"):
        tflite_file = os.path.join(TFLITE_DIR, f)
        break

if tflite_file:
    header_file = os.path.join(OUTPUT_PATH, "model_data.h")
    tflite_to_header(tflite_file, header_file, "model_data")
    print(f"Header: {header_file}")
else:
    print("TFLite file not found!")