# Helmet OCR with YOLOv8
This notebook detects helmets in images, crops the sticker area, performs OCR, and summarizes size/year counts.

In [None]:
import cv2
import os
import pytesseract
import re
from ultralytics import YOLO
from collections import defaultdict
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
model = YOLO('yolov8n.pt')

In [None]:
def detect_helmets(image):
    results = model(image)
    boxes = []
    for result in results:
        for box in result.boxes:
            boxes.append(box.xyxy[0].tolist())
    return boxes

In [None]:
def crop_sticker_region_from_helmet(image, box):
    x1, y1, x2, y2 = [int(x) for x in box]
    helmet_crop = image[y1:y2, x1:x2]
    h, w = helmet_crop.shape[:2]
    return helmet_crop[int(h*0.7):h, 0:int(w*0.4)]

In [None]:
def extract_size_year(crop_img):
    gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
    text = pytesseract.image_to_string(thresh)
    match = re.search(r'(XS|S|M|L|XL|XXL)[\s\-]*(20\d{2})', text)
    return match.groups() if match else (None, None)

In [None]:
summary = defaultdict(int)
for fname in os.listdir('photos'):
    img = cv2.imread(f'photos/{fname}')
    boxes = detect_helmets(img)
    for i, box in enumerate(boxes):
        crop = crop_sticker_region_from_helmet(img, box)
        size, year = extract_size_year(crop)
        if size and year:
            summary[(size, year)] += 1
        plt.imshow(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
        plt.title(f'{size} {year}')
        plt.show()
df = pd.DataFrame([{'Size': s, 'Year': y, 'Count': c} for (s, y), c in summary.items()])
df