In [4]:
from ultralytics import YOLO, solutions
import cv2

In [5]:
model = YOLO('yolo11n.pt')

In [8]:
def display_objects(video_path, output_video_path):
    cap = cv2.VideoCapture(video_path)
    assert cap.isOpened(), "Error reading video file"
    w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)[0]

        for box in results.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls = int(box.cls[0])
            label = model.names[cls]
            conf = box.conf[0]

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'{label} {conf:.2f}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)

        out.write(frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [25]:
def counting_objects_in_regions(video_path, output_video_path):
    cap = cv2.VideoCapture(video_path)
    assert cap.isOpened(), "Error reading video file"
    w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
    regions = [(500, 300, 900, 900),(1200, 300, 1600, 900)]
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)[0]
        map_count_in_region = {}
        for index, region in enumerate(regions):
            map_count_in_region[index] = 0
        for box in results.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls = int(box.cls[0])
            label = model.names[cls]
            conf = box.conf[0]

            # Tính tâm của bounding box
            cx = (x1 + x2) // 2
            cy = (y1 + y2) // 2

            color = (0, 0, 255)
            # Kiểm tra tâm nằm trong vùng định sẵn
            for index, region in enumerate(regions):
                rx1, ry1, rx2, ry2 = region
                if rx1 <= cx <= rx2 and ry1 <= cy <= ry2:
                    map_count_in_region[index] = map_count_in_region[index] +1
                    color = (0, 255, 0)  # xanh lá nếu nằm trong vùng


            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f'{label} {conf:.2f}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)

         # Vẽ vùng region
        for index, region in enumerate(regions):
            cv2.rectangle(frame, (region[0], region[1]), (region[2], region[3]), (255, 255, 0), 2)
            cv2.putText(frame, f'Count in region: {map_count_in_region[index]}', (region[0], region[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
        out.write(frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [26]:
# display_objects("../resources/video.mp4", "output_video.mp4")
counting_objects_in_regions("../resources/video.mp4", "output_video_counting.mp4")


0: 384x640 11 cars, 1 truck, 42.8ms
Speed: 1.9ms preprocess, 42.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 34.2ms
Speed: 1.3ms preprocess, 34.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 35.6ms
Speed: 1.4ms preprocess, 35.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 1 truck, 33.5ms
Speed: 1.3ms preprocess, 33.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 1 truck, 35.9ms
Speed: 1.4ms preprocess, 35.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 1 truck, 33.5ms
Speed: 1.3ms preprocess, 33.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 2 trucks, 31.3ms
Speed: 1.4ms preprocess, 31.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 2 trucks, 30.8ms
Speed: 1.3ms preprocess, 30