In [1]:
from ultralytics import YOLO

In [3]:
model=YOLO('models/yolo11n.pt')

In [5]:
pip install lap

Collecting lap
  Downloading lap-0.5.12-cp39-cp39-win_amd64.whl.metadata (6.3 kB)
Downloading lap-0.5.12-cp39-cp39-win_amd64.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 1.5/1.5 MB 8.6 MB/s eta 0:00:00
Installing collected packages: lap
Successfully installed lap-0.5.12
Note: you may need to restart the kernel to use updated packages.




In [7]:
import cv2

from ultralytics import YOLO

# Load the YOLO11 model
model = YOLO("models/yolo11n.pt")

# Open the video file
video_path = "segment_24.mp4"
cap = cv2.VideoCapture(video_path)


# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLO11 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLO11 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


0: 480x640 4 cars, 1 potted plant, 80.8ms
Speed: 2.0ms preprocess, 80.8ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 4 cars, 1 potted plant, 82.7ms
Speed: 1.4ms preprocess, 82.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 1 potted plant, 78.4ms
Speed: 1.0ms preprocess, 78.4ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 1 potted plant, 84.9ms
Speed: 1.2ms preprocess, 84.9ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 1 potted plant, 85.2ms
Speed: 1.5ms preprocess, 85.2ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 1 potted plant, 88.4ms
Speed: 1.0ms preprocess, 88.4ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 1 potted plant, 91.9ms
Speed: 1.5ms preprocess, 91.9ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 

In [44]:
from collections import defaultdict
import cv2
import numpy as np
from ultralytics import YOLO

# Load the YOLO model (ensure this is the correct model)
model = YOLO("models/yolo11n.pt")  # Use "yolov8n.pt" instead of "yolo11n.pt" (no "yolo11" exists)

# Open the video file
video_path = "segment_24.mp4"
cap = cv2.VideoCapture(video_path)

# Store tracking history and unique object IDs
track_history = defaultdict(lambda: [])
unique_objects = set()

# Define class IDs for cars, bicycles, and people (COCO dataset)
CLASSES_TO_DETECT = {
    0: "person",
    1: "bicycle",
    2: "car"
}

# Store object counts
object_counts = {cls: 0 for cls in CLASSES_TO_DETECT}

# Loop through video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLO object tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Get the boxes, class IDs, and track IDs
        if results[0].boxes.id is not None:
            boxes = results[0].boxes.xywh.cpu()  # Get bounding boxes
            track_ids = results[0].boxes.id.int().cpu().tolist()  # Get track IDs
            class_ids = results[0].boxes.cls.int().cpu().tolist()  # Get class IDs

            # Visualize the results on the frame
            annotated_frame = results[0].plot()

            # Track and count unique objects
            for box, track_id, cls in zip(boxes, track_ids, class_ids):
                if cls in CLASSES_TO_DETECT:
                    x, y, w, h = box
                    track = track_history[track_id]
                    track.append((float(x), float(y)))  # Store track points
                    if len(track) > 30:  # Keep the last 30 frames of history
                        track.pop(0)

                    # Ensure unique objects are only counted once
                    if track_id not in unique_objects:
                        unique_objects.add(track_id)
                        object_counts[cls] += 1  # Increment count for class

                    # Draw tracking lines
                    points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
                    cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=3)

            # Overlay the tally on the video
            tally_text = f"Cars: {object_counts[2]} | Bicycles: {object_counts[1]} | People: {object_counts[0]}"
            cv2.putText(annotated_frame, tally_text, (20, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            # Display the annotated frame
            cv2.imshow("YOLO Object Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Print final count in the log
print(f"\nFinal Tally:")
print(f"Cars: {object_counts[2]}")
print(f"Bicycles: {object_counts[1]}")
print(f"People: {object_counts[0]}")

# Release video capture and close display
cap.release()
cv2.destroyAllWindows()



0: 480x640 4 cars, 1 potted plant, 74.8ms
Speed: 2.2ms preprocess, 74.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 4 cars, 1 potted plant, 98.7ms
Speed: 1.3ms preprocess, 98.7ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 1 potted plant, 95.6ms
Speed: 1.3ms preprocess, 95.6ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 1 potted plant, 81.8ms
Speed: 1.4ms preprocess, 81.8ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 1 potted plant, 82.2ms
Speed: 1.3ms preprocess, 82.2ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 1 potted plant, 84.2ms
Speed: 1.3ms preprocess, 84.2ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 1 potted plant, 94.0ms
Speed: 1.3ms preprocess, 94.0ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 

In [45]:
from collections import defaultdict
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLO model and move to GPU
model = YOLO("models/yolo11m.pt")

# Open the video file
video_path = "segment_24.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

#playback speed
PLAYBACK_SPEED = 5
frame_rate=fps*PLAYBACK_SPEED

# Video writer to save the processed output
output_path = "processed_video.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fps // 3, (width, height))  # Process 1/3 of frames

# Store tracking history and unique object IDs
track_history = defaultdict(lambda: [])
unique_objects = set()

# Define class IDs for cars, bicycles, and people (COCO dataset)
CLASSES_TO_DETECT = {
    0: "person",
    1: "bicycle",
    2: "car",
    3: "motorcycle",
    5: "bus",
    7: "truck"
}

# Store object counts
object_counts = {cls: 0 for cls in CLASSES_TO_DETECT}

# Confidence threshold for counting
CONFIDENCE_THRESHOLD = 0.6  # Objects must have at least 75% confidence
FRAME_THRESHOLD_Y = height // 3  # Only count objects in the lower 2/3 of the frame
FRAME_SKIP = 5  # Process every 3rd frame for efficiency

# Initialize contrast enhancement
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))  # Adaptive histogram equalization

# Loop through video frames
frame_number = 0
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    # **Play video at 2x or 3x speed**
    if frame_number % PLAYBACK_SPEED != 0:
        frame_number += 1
        continue

    # **Skip frames dynamically for speed-up**
    if frame_number % FRAME_SKIP != 0:
        frame_number += 1
        continue

    # Adaptive brightness correction
    lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)  # Convert to LAB color space
    l, a, b = cv2.split(lab)  # Split channels
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))  # Apply CLAHE
    l = clahe.apply(l)
    enhanced_lab = cv2.merge((l, a, b))  # Merge channels back
    frame = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)  # Convert back to BGR

    # Run YOLO object tracking on the frame
    results = model.track(frame, persist=True)

    # Get the boxes, class IDs, and track IDs
    if results[0].boxes.id is not None:
        boxes = results[0].boxes.xywh.cpu()  # Get bounding boxes
        track_ids = results[0].boxes.id.int().cpu().tolist()  # Get track IDs
        class_ids = results[0].boxes.cls.int().cpu().tolist()  # Get class IDs
        confidences = results[0].boxes.conf.cpu().tolist()  # Get confidence scores

        # Create a copy of the frame for annotation
        annotated_frame = frame.copy()

        # Track and count unique objects
        for box, track_id, cls, conf in zip(boxes, track_ids, class_ids, confidences):
            if cls in CLASSES_TO_DETECT and conf >= CONFIDENCE_THRESHOLD:
                x, y, w, h = box

                # Only count objects appearing in the lower 2/3 of the frame
                if y + h / 2 >= FRAME_THRESHOLD_Y:
                    track = track_history[track_id]
                    track.append((float(x), float(y)))  # Store track points
                    if len(track) > 30:  # Keep the last 30 frames of history
                        track.pop(0)

                    # Ensure unique objects are only counted once
                    if track_id not in unique_objects:
                        unique_objects.add(track_id)
                        object_counts[cls] += 1  # Increment count for class

                    # Draw bounding boxes and labels for tracked classes only
                    label = f"{CLASSES_TO_DETECT[cls]} {object_counts[cls]}"
                    color = (0, 255, 0) if cls == 2 else (255, 0, 0) if cls == 1 else (0, 0, 255)
                    cv2.rectangle(annotated_frame, (int(x - w/2), int(y - h/2)), (int(x + w/2), int(y + h/2)), color, 2)
                    cv2.putText(annotated_frame, label, (int(x - w/2), int(y - h/2) - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                    # Draw tracking lines
                    points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
                    cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=3)

        # Overlay the tally on the video
        tally_text = f"Cars: {object_counts[2]+object_counts[3]+object_counts[5]+object_counts[7]} | Bicycles: {object_counts[1]} | People: {object_counts[0]}"
        cv2.putText(annotated_frame, tally_text, (20, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        # Save the processed frame
        out.write(annotated_frame)

        # Display the frame with annotations
        cv2.imshow("YOLO Object Tracking", annotated_frame)

    frame_number += 1

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Print final count in the log
print(f"\nFinal Tally:")
print(f"Cars: {object_counts[2]}")
print(f"Bicycles: {object_counts[1]}")
print(f"People: {object_counts[0]}")
print(f"Motorcycles: {object_counts[3]}")
print(f"Buses: {object_counts[5]}")
print(f"Trucks: {object_counts[7]}")

# Release video capture and close display
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Processed video saved as: {output_path}")



0: 480x640 2 cars, 280.1ms
Speed: 1.3ms preprocess, 280.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 345.2ms
Speed: 2.5ms preprocess, 345.2ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 303.4ms
Speed: 1.3ms preprocess, 303.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 311.6ms
Speed: 1.3ms preprocess, 311.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 287.2ms
Speed: 1.5ms preprocess, 287.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 cars, 280.9ms
Speed: 1.1ms preprocess, 280.9ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 cars, 405.4ms
Speed: 1.4ms preprocess, 405.4ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 4 cars, 513.5ms
Speed: 1.6ms preprocess, 513.5ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 6