In [5]:
# Load model from Hugging Face
from ultralytics import YOLO
from huggingface_hub import hf_hub_download
import cv2
import os

MODEL_PATH = hf_hub_download(
    repo_id="QuincySorrentino/AeroYOLO",
    filename="best.pt"
)

# Load the YOLO model
model = YOLO(MODEL_PATH)
print(f"Model loaded from: {MODEL_PATH}")

Model loaded from: C:\Users\qwsor\.cache\huggingface\hub\models--QuincySorrentino--AeroYOLO\snapshots\67e0fb4799f24c7972a6de24f1d3f7cfdc2b48c7\best.pt


In [None]:
# Open video
cap = cv2.VideoCapture('test_images/plane2.mp4')

# Get video properties (width, height, fps)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

print(f"Video: {width}x{height} @ {fps}fps, {total_frames} frames")

# Create video writer for output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('runs/output.mp4', fourcc, fps, (width, height))

# Temporal filtering setup
detection_buffer = {}  # Track how many consecutive frames each object appears
MIN_CONSECUTIVE_FRAMES = 3  # Only show detections that appear in 3+ frames
BUFFER_DECAY = 10  # Reset counter if not seen for this many frames

# Process each frame with tracking
frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    frame_count += 1
    
    # Run inference with tracking (reduces flickering)
    results = model.track(
        frame,
        conf=0.57,              
        iou=0.7,               
        max_det=10,
        imgsz=640,
        tracker="botsort.yaml",  
        persist=True,          
        verbose=False
    )
    
    # Temporal filtering - only keep stable detections
    current_track_ids = set()
    stable_boxes = []
    
    if results[0].boxes.id is not None:
        for box in results[0].boxes:
            track_id = int(box.id[0])
            current_track_ids.add(track_id)
            
            # Increment counter for this track
            if track_id not in detection_buffer:
                detection_buffer[track_id] = {'count': 0, 'last_seen': frame_count}
            
            detection_buffer[track_id]['count'] += 1
            detection_buffer[track_id]['last_seen'] = frame_count
            
            # Only keep if seen enough times
            if detection_buffer[track_id]['count'] >= MIN_CONSECUTIVE_FRAMES:
                stable_boxes.append(box)
    
    # Decay old tracks
    tracks_to_remove = []
    for track_id in detection_buffer:
        if track_id not in current_track_ids:
            if frame_count - detection_buffer[track_id]['last_seen'] > BUFFER_DECAY:
                tracks_to_remove.append(track_id)
    
    for track_id in tracks_to_remove:
        del detection_buffer[track_id]
    
    # Plot only stable detections
    annotated_frame = frame.copy()
    for box in stable_boxes:
        # Get box coordinates
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        cls = int(box.cls[0])
        conf = float(box.conf[0])
        track_id = int(box.id[0])
        
        # Draw bounding box
        cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Draw label
        label = f"{results[0].names[cls]} {conf:.2f} ID:{track_id}"
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
        cv2.rectangle(annotated_frame, (x1, y1 - label_height - 10), (x1 + label_width, y1), (0, 255, 0), -1)
        cv2.putText(annotated_frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
    
    out.write(annotated_frame)
    
    if frame_count % 30 == 0:
        print(f"Processed {frame_count}/{total_frames} frames ({frame_count/total_frames*100:.1f}%) - Active tracks: {len(stable_boxes)}")


cap.release()
out.release()

print(f"\nProcessing complete. Output saved to: runs/output.mp4")
print(f"Total frames processed: {frame_count}")


Video: 1920x1080 @ 30fps, 260 frames
Processed 30/260 frames (11.5%) - Active tracks: 1
Processed 60/260 frames (23.1%) - Active tracks: 2
Processed 90/260 frames (34.6%) - Active tracks: 0
Processed 120/260 frames (46.2%) - Active tracks: 0
Processed 150/260 frames (57.7%) - Active tracks: 1
Processed 180/260 frames (69.2%) - Active tracks: 1
Processed 210/260 frames (80.8%) - Active tracks: 0
Processed 240/260 frames (92.3%) - Active tracks: 1

Processing complete. Output saved to: runs/output.mp4
Total frames processed: 260
