In [2]:
import cv2
from ultralytics import YOLO
from collections import defaultdict
import time

# Load the YOLO model
model = YOLO('yolo11n.pt')

In [2]:
# class_list
class_list = model.names
print(class_list)

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [19]:
# Open the video file
cap = cv2.VideoCapture('test.mp4')

# Get the width and height of the frames
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 10, (frame_width, frame_height))

# Dictionary to store object counts by class
class_counts = defaultdict(int)

# Dictionary to keep track of object IDs that have crossed the line
crossed_ids = set()

# Red Line Coordinates
red_line = 360

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Resize frame
    resized_frame = cv2.resize(frame, (640, 480))

    # Run YOLO tracking on the frame
    results = model.track(resized_frame, persist=True, classes=[2, 3, 5, 7])

    # Ensure results are not empty
    if results[0].boxes.data is not None:
        # Get the detected boxes, their class indices, and track IDs
        boxes = results[0].boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.cpu().tolist() if results[0].boxes.id is not None else []
        class_indices = results[0].boxes.cls.cpu().tolist() if results[0].boxes.cls is not None else []
        confidences = results[0].boxes.conf.cpu() if results[0].boxes.conf is not None else []

        cv2.line(resized_frame, (0, red_line), (640, red_line), (0, 0, 255), 3)

        # Loop through each detected object
        for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
            x1, y1, x2, y2 = map(int, box)
            cx = (x1 + x2) // 2  # Calculate the center point
            cy = (y1 + y2) // 2

            class_name = class_list[class_idx]

            cv2.circle(resized_frame, (cx, cy), 4, (0, 0, 255), -1)

            cv2.putText(resized_frame, f"ID: {track_id} {class_name}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            cv2.rectangle(resized_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Check if the object has crossed the red line
            if cy > red_line and track_id not in crossed_ids:
                # Mark the object as crossed
                crossed_ids.add(track_id)
                class_counts[class_name] += 1

        # Display the counts on the frame
        y_offset = 30
        for class_name, count in class_counts.items():
            cv2.putText(resized_frame, f"{class_name}: {count}", (10, y_offset),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            y_offset += 30

    # Write the frame to the output video
    out.write(resized_frame)

    # Show the frame
    cv2.imshow("Object Tracking & Counting", resized_frame)

    # Exit loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()


0: 480x640 6 cars, 1 bus, 1 truck, 225.0ms
Speed: 3.3ms preprocess, 225.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 4 cars, 1 truck, 196.7ms
Speed: 3.6ms preprocess, 196.7ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 4 cars, 1 bus, 191.1ms
Speed: 0.0ms preprocess, 191.1ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 1 bus, 183.4ms
Speed: 1.1ms preprocess, 183.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 4 cars, 1 bus, 158.9ms
Speed: 2.5ms preprocess, 158.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 1 bus, 135.3ms
Speed: 5.0ms preprocess, 135.3ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 1 bus, 129.4ms
Speed: 2.5ms preprocess, 129.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 1 truck, 125.0ms
Speed: 4.0ms preproce