In [1]:
from ultralytics import YOLO
import cv2
import yt_dlp

In [None]:
url = "https://www.youtube.com/watch?v=KMJS66jBtVQ"
yt = YouTube(url)

# Download best mp4 stream
stream = yt.streams.filter(file_extension="mp4").get_highest_resolution()
stream.download(filename="video.mp4")

'/home/leonelmaia/LM_repo/repo/decathlon/video.mp4'

In [2]:
# Load YOLOv8 pretrained model
model = YOLO("yolov8n.pt")  # small & fast, good for testing
#model = YOLO("yolov8x.pt") 
#model = YOLO("yolov8l.pt")

# Track people (COCO class 0) with ByteTrack and save annotated video
results = model.track(
    source="video.mp4",    # path to your local video file
    classes=[0],                 # 0 = person
    tracker="bytetrack.yaml",    # built-in tracker
    conf=0.20,                   # lower conf to help with edge detections
    iou=0.50,                    # NMS threshold
    save=True,                   # writes annotated MP4
    vid_stride=1                 # process every frame (increase for speed)
)

[31m[1mrequirements:[0m Ultralytics requirement ['lap>=0.5.12'] not found, attempting AutoUpdate...
Collecting lap>=0.5.12
  Downloading lap-0.5.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: lap
Successfully installed lap-0.5.12

[31m[1mrequirements:[0m AutoUpdate success ✅ 2.4s




[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m



inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/1283) /home/leonelmaia/LM_repo/repo/decathlon/video.mp4: 384x640 10 persons, 574.7ms
video 1/1 (frame 2/1283) /home/leonelmaia/LM_repo/repo/decathlon/video.mp4: 384x640 10 persons, 453.8ms
video 1/1 (frame 3/1283) /home/leonelmaia/LM_repo/repo/decathlon/video.mp4: 384x640 10 persons, 432.9ms
video 1/1 (frame 4/1283) /home/leonelmaia/LM_repo/repo/decathlon/video.mp4: 384x640 10 persons, 306.9ms
video 1/1 (frame 5/1283) /home/leonelmaia/LM_repo/re

KeyboardInterrupt: 

In [3]:
VIDEO_IN  = "video.mp4"
VIDEO_OUT = "output_tracked.mp4"

# 1) Load a larger YOLO model for better edge performance
#model = YOLO("yolov8x.pt")  # or yolov8l.pt for less GPU memory
model = YOLO("yolov8n.pt")  # small & fast, good for testing
# 2) Open the video
cap = cv2.VideoCapture(VIDEO_IN)
if not cap.isOpened():
    raise RuntimeError(f"Cannot open {VIDEO_IN}")

fps    = cap.get(cv2.CAP_PROP_FPS) or 30
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out    = cv2.VideoWriter(VIDEO_OUT, fourcc, fps, (width, height))

# 3) Running set of unique IDs
unique_ids = set()

# Optional: small padding to help with border detections (comment out if not needed)
PADDING = 32

def pad_frame(img, pad=PADDING):
    return cv2.copyMakeBorder(img, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=(0,0,0))

def unpad_boxes(boxes, pad=PADDING):
    # Shift boxes back after padding: x1,y1,x2,y2 -> minus pad
    if boxes is None: return None
    boxes[:, [0,2]] -= pad
    boxes[:, [1,3]] -= pad
    return boxes

print("🚀 Processing…")
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 4) (Optional) pad frame
    frame_in = pad_frame(frame) if PADDING > 0 else frame

    # 5) Track with ByteTrack; detect only people (class 0)
    # conf lowered to keep edge/partial detections
    results = model.track(
        source=frame_in,
        classes=[0],
        tracker="bytetrack.yaml",
        conf=0.20,
        iou=0.50,
        persist=True,        # keep tracker state across calls
        verbose=False
    )

    r = results[0]
    annotated = r.plot()  # draw boxes + track IDs on the (padded) frame

    # 6) Update unique IDs
    if r.boxes is not None and r.boxes.id is not None:
        ids = r.boxes.id.cpu().numpy().astype(int)
        for i in ids:
            unique_ids.add(int(i))

    # 7) Remove padding from drawn frame (crop back to original size)
    if PADDING > 0:
        annotated = annotated[PADDING:-PADDING, PADDING:-PADDING]

    # 8) Overlay counts (per-frame + unique so far)
    per_frame_count = 0 if r.boxes is None else len(r.boxes)

    # Get frame height to anchor bottom-left
    h, w, _ = annotated.shape  

    # Box size smaller (width=220, height=60)
    x1, y1 = 10, h - 70      # top-left corner of box
    x2, y2 = 230, h - 10     # bottom-right corner of box

    # Draw semi-transparent rectangle
    cv2.rectangle(annotated, (x1, y1), (x2, y2), (0, 0, 0), -1)

    # Add texts (smaller font)
    cv2.putText(annotated, f"Now: {per_frame_count}", (x1 + 10, y1 + 25),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    cv2.putText(annotated, f"Unique: {len(unique_ids)}", (x1 + 10, y1 + 50),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    # 9) Save
    out.write(annotated)

cap.release()
out.release()
print(f"✅ Saved annotated video to {VIDEO_OUT}")
print(f"👥 Total unique customers seen: {len(unique_ids)}")


🚀 Processing…
✅ Saved annotated video to output_tracked.mp4
👥 Total unique customers seen: 164
