In [None]:
import cv2
import torch
import easyocr
from ultralytics import YOLO
import pandas as pd
from datetime import datetime
import torchvision.transforms as T
from PIL import Image
import torch.nn as nn

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# Detector YOLO
detector = YOLO("models/yolo11n_best.pt")

# EasyOCR (idiomas español e inglés)
reader_easy = easyocr.Reader(['es', 'en'])

# Definición de CRNN
class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1)),
            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d((2,1), (2,1))
        )
        self.rnn = nn.LSTM(512, 256, num_layers=2, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.cnn(x)
        b, c, h, w = x.size()
        x = x.view(b, c*h, w).permute(0, 2, 1)
        x, _ = self.rnn(x)
        x = self.fc(x)
        return x

# Carga del modelo CRNN
model_crnn = torch.load("models/ocr_v3.pt", map_location=device)
model_crnn.eval()
print("✅ Modelo CRNN cargado correctamente.")

# Transformaciones para CRNN
transform = T.Compose([
    T.Grayscale(),
    T.Resize((32, 128)),
    T.ToTensor(),
])

# Diccionario de caracteres
CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
idx_to_char = {i: c for i, c in enumerate(CHARS)}

def decode_ctc(output):
    """Decodifica la salida CTC en texto."""
    pred = output.softmax(2).argmax(2).squeeze(0).cpu().numpy()
    text = ""
    prev_char = -1
    for c in pred:
        if c != prev_char and c < len(CHARS):
            text += idx_to_char.get(c, "")
        prev_char = c
    return text


In [None]:
VIDEO = "plates_test.mp4"
cap = cv2.VideoCapture(VIDEO)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = 0
data_rows = []

print("Procesando vídeo...")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1
    timestamp = datetime.fromtimestamp((frame_count / fps)).strftime("%H:%M:%S.%f")[:-3]
    results = detector(frame, verbose=False)

    if results[0].boxes:
        for box in results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            placa = frame[y1:y2, x1:x2]
            if placa.size == 0:
                continue

            # EASY OCR
            try:
                text_easy = reader_easy.readtext(placa, detail=0, allowlist=CHARS)
                text_easy = max(text_easy, key=len).replace(" ", "") if text_easy else ""
            except:
                text_easy = ""

            # CRNN (modelo propio)
            try:
                placa_rgb = cv2.cvtColor(placa, cv2.COLOR_BGR2RGB)
                img_pil = Image.fromarray(placa_rgb)
                img_t = transform(img_pil).unsqueeze(0).to(device)
                with torch.no_grad():
                    out = model_crnn(img_t)
                text_crnn = decode_ctc(out)
            except:
                text_crnn = ""

            data_rows.append({
                "Frame": frame_count,
                "Tiempo": timestamp,
                "EasyOCR": text_easy,
                "CRNN_Custom": text_crnn
            })

cap.release()

# Guardar resultados
df = pd.DataFrame(data_rows)
df.to_csv("comparacion_ocr_v3_yolo11n.csv", index=False)
print("✅ Comparación completada.")
