In [2]:
pip install ultralytics opencv-python numpy

Collecting ultralytics
  Downloading ultralytics-8.4.14-py3-none-any.whl.metadata (39 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.4.14-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.4.14 ultralytics-thop-2.0.18


In [4]:
from google.colab.patches import cv2_imshow

In [9]:
import cv2
from ultralytics import YOLO
from IPython.display import Video, display

# Load YOLO model
model = YOLO("yolov8n.pt")

video_path = "amazon_video.mp4"  # change if needed
cap = cv2.VideoCapture(video_path)

width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps    = int(cap.get(cv2.CAP_PROP_FPS))

output_path = "final_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

line_y = height // 2

prev_y_positions = {}
counted_ids = set()
total_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model.track(frame, persist=True, conf=0.4)

    if results[0].boxes.id is not None:

        boxes = results[0].boxes.xyxy.cpu().numpy()
        ids = results[0].boxes.id.cpu().numpy()

        for box, track_id in zip(boxes, ids):
            x1, y1, x2, y2 = map(int, box)
            cx = (x1 + x2) // 2
            cy = (y1 + y2) // 2

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
            cv2.circle(frame, (cx, cy), 4, (255,0,0), -1)
            cv2.putText(frame, f"ID {int(track_id)}", (x1, y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)

            # Check crossing
            if track_id in prev_y_positions:
                prev_y = prev_y_positions[track_id]

                # Crossed from top to bottom
                if prev_y < line_y and cy >= line_y:
                    if track_id not in counted_ids:
                        total_count += 1
                        counted_ids.add(track_id)

            prev_y_positions[track_id] = cy

    # Draw red counting line
    cv2.line(frame, (0, line_y), (width, line_y), (0,0,255), 3)

    # Show total count
    cv2.putText(frame, f"TOTAL PASSED: {total_count}", (30,60),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0), 3)

    out.write(frame)

cap.release()
out.release()

print("✅ Finished Counting")
display(Video(output_path))



0: 640x384 1 cake, 9.9ms
Speed: 2.1ms preprocess, 9.9ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 8.3ms
Speed: 2.1ms preprocess, 8.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 6.8ms
Speed: 2.8ms preprocess, 6.8ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 7.3ms
Speed: 2.5ms preprocess, 7.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 7.6ms
Speed: 2.1ms preprocess, 7.6ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 7.3ms
Speed: 2.6ms preprocess, 7.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 7.5ms
Speed: 2.2ms preprocess, 7.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 7.3ms
Speed: 2.5ms preprocess, 7.3ms inference, 0.6ms postproce