In [4]:
#!/usr/bin/env python
"""
Crowd Detection: Detects people in input video using YOLOv8 and saves annotated output.

Author: Никита
Date: 2025-11-06
"""

import os
import cv2
import numpy as np
from ultralytics import YOLO
from tqdm import tqdm
from typing import Tuple, List
from pathlib import Path


def load_model(model_path: str = "yolov8n.pt") -> YOLO:
    """
    Load pre-trained YOLOv8 model. Downloads if not present.

    Args:
        model_path (str): Path to model weights (e.g., 'yolov8n.pt').

    Returns:
        YOLO: Loaded model instance.
    """
    model_path = str(Path(model_path))
    if not Path(model_path).exists():
        print(f"Model {model_path} not found. Downloading from Ultralytics...")
    return YOLO(model_path)


def draw_detections(
    frame: np.ndarray,
    results,
    conf_threshold: float = 0.4,
    color: Tuple[int, int, int] = (0, 255, 0),
    thickness: int = 2
) -> np.ndarray:
    """
    Draw bounding boxes and labels (person + confidence) on the frame.

    Args:
        frame (np.ndarray): Input BGR frame.
        results: YOLO inference results.
        conf_threshold (float): Minimum confidence to display.
        color (Tuple[int, int, int]): BGR color for bounding box and label.
        thickness (int): Line thickness for bounding box.

    Returns:
        np.ndarray: Annotated frame.
    """
    annotated_frame = frame.copy()
    h, w = frame.shape[:2]

    for result in results:
        for box in result.boxes:
            conf = box.conf.item()
            if conf < conf_threshold:
                continue

            cls_id = int(box.cls.item())
            if cls_id != 0:  # 0 = 'person' in COCO
                continue

            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            label = f"person {conf:.2f}"

            # Draw bounding box
            cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), color, thickness)

            # Draw label background
            label_size, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            label_y = y1 - 10 if y1 - 10 > 10 else y1 + 20 + baseline
            cv2.rectangle(
                annotated_frame,
                (x1, label_y - label_size[1] - 10),
                (x1 + label_size[0], label_y),
                color,
                -1
            )

            # Draw label text
            cv2.putText(
                annotated_frame,
                label,
                (x1, label_y - 5),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 0, 0),
                1,
                cv2.LINE_AA
            )

    return annotated_frame


def process_video(
    video_path: str,
    output_path: str,
    model: YOLO,
    conf_threshold: float = 0.4,
    frame_skip: int = 1
) -> None:
    """
    Process video: detect people, annotate, save output.

    Args:
        video_path (str): Path to input video.
        output_path (str): Path to save annotated video.
        model (YOLO): Preloaded YOLO model.
        conf_threshold (float): Confidence threshold for detection.
        frame_skip (int): Process every Nth frame (1 = all frames).
    """
    video_path = str(Path(video_path))
    output_path = str(Path(output_path))

    if not Path(video_path).exists():
        raise FileNotFoundError(f"Input video not found: {video_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError(f"Cannot open video file: {video_path}")

    # Video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    print(f"Input: {width}x{height}, {fps:.2f} FPS, {total_frames} frames")

    # Output writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    frame_idx = 0
    processed_count = 0

    print("Processing frames...")
    with tqdm(total=total_frames, unit="frame", colour="green") as pbar:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            current_frame = frame.copy()

            if frame_idx % frame_skip == 0:
                # Run inference
                results = model(frame, conf=conf_threshold, classes=[0], verbose=False)[0]
                # Annotate
                annotated_frame = draw_detections(current_frame, [results], conf_threshold)
                processed_count += 1
            else:
                annotated_frame = current_frame

            out.write(annotated_frame)
            frame_idx += 1
            pbar.update(1)

    cap.release()
    out.release()
    print(f"Output saved: {output_path}")
    print(f"Processed {processed_count} frames with detection.")


def main() -> None:
    """Entry point of the program."""
    video_input = "crowd.mp4"
    video_output = "output_video.mp4"
    model = load_model("yolov8n.pt")

    process_video(
        video_path=video_input,
        output_path=video_output,
        model=model,
        conf_threshold=0.4,
        frame_skip=1
    )


if __name__ == "__main__":
    main()

Video: 1920x1080, 29.97 FPS, 705 frames
Processing video...


100%|█████████████████████████████████████████████████████████████████████████████| 705/705 [00:33<00:00, 21.29frame/s]

Done! Saved to output_video.mp4
Processed 705 frames with detection.



