# Pelé en 1970 contra Uruguay computer vision

In [4]:
import cv2
import torch
import requests
from PIL import Image
from transformers import RTDetrForObjectDetection, RTDetrImageProcessor

model_name = "PekingU/rtdetr_r50vd_coco_o365"
image_processor = RTDetrImageProcessor.from_pretrained(model_name)
model = RTDetrForObjectDetection.from_pretrained(model_name)

In [None]:
# Mover el modelo a la GPU si está disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

In [None]:
# Configurar el video
video_path = 'C:\\Users\\JuanCarlosSaraviaDra\\Dropbox\\Object_detection_Pele\\Pele_Uruguay_1970.mp4'
output_path = 'C:\\Users\\JuanCarlosSaraviaDra\\Dropbox\\Object_detection_Pele\\Pele_Uruguay_1970_vision.mp4'
cap = cv2.VideoCapture(video_path)

# Obtener propiedades del video
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

frame_count = 0
last_detections = []  # Para almacenar las últimas detecciones

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Procesar solo cada n fotogramas
    if frame_count % 1 == 0:
        # Convertir el fotograma a una imagen PIL
        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

        # Procesar la imagen con RTDetr
        inputs = image_processor(images=image, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)

        results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.6)

        # Actualizar las últimas detecciones
        last_detections = []
        for result in results:
            for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
                box = [int(i) for i in box.tolist()]
                label = model.config.id2label[label_id.item()]
                last_detections.append((box, label, score.item()))

    # Dibujar las últimas detecciones en el fotograma actual
    for box, label, score in last_detections:
        x1, y1, x2, y2 = box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"{label}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Escribir el fotograma procesado en el archivo de salida
    out.write(frame)
    frame_count += 1

cap.release()
out.release()
print(f"Processed {frame_count} frames and saved to {output_path}")