# Helmet OCR Agent Notebook
This notebook captures images, simulates extracting sticker regions, performs OCR, and summarizes helmet sizes and purchase years.

In [None]:
import cv2
import pytesseract
from PIL import Image
import numpy as np
import re
import os
from collections import defaultdict
import pandas as pd

In [None]:
# PARAMETERS
NUM_PHOTOS = 5
CAMERA_INDEX = 0
PHOTO_DIR = "photos"
os.makedirs(PHOTO_DIR, exist_ok=True)

In [None]:
# Step 1: Capture Images
def capture_images():
    cam = cv2.VideoCapture(CAMERA_INDEX)
    for i in range(NUM_PHOTOS):
        ret, frame = cam.read()
        if ret:
            cv2.imwrite(f"{PHOTO_DIR}/photo_{i}.jpg", frame)
    cam.release()

In [None]:
# Step 2: Simulate cropping helmet stickers
def extract_sticker_regions(image):
    h, w, _ = image.shape
    crops = [
        image[int(h*0.7):h, int(w*0.05):int(w*0.25)],
        image[int(h*0.7):h, int(w*0.35):int(w*0.55)]
    ]
    return crops

In [None]:
# Step 3: OCR on cropped sticker
def extract_size_year(crop_img):
    gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
    pil_img = Image.fromarray(thresh)
    text = pytesseract.image_to_string(pil_img)
    match = re.search(r'(XS|S|M|L|XL|XXL)[\s\-]*(20\d{2})', text)
    return match.groups() if match else (None, None)

In [None]:
# Step 4: Full process + summary table
def run_pipeline():
    summary = defaultdict(int)
    for i in range(NUM_PHOTOS):
        path = f"{PHOTO_DIR}/photo_{i}.jpg"
        img = cv2.imread(path)
        if img is None:
            continue
        crops = extract_sticker_regions(img)
        for crop in crops:
            size, year = extract_size_year(crop)
            if size and year:
                summary[(size, year)] += 1
    df = pd.DataFrame([{'Size': s, 'Year': y, 'Count': c} for (s, y), c in summary.items()])
    return df.sort_values(['Year', 'Size'])