In [2]:
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

from collections import defaultdict

track_history = defaultdict(lambda: [])

model = YOLO("yolov8n-seg.pt")   # segmentation model
cap = cv2.VideoCapture("sihung.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

out = cv2.VideoWriter('instance-segmentation-object-tracking.avi', cv2.VideoWriter_fourcc(*'MJPG'), fps, (w, h))

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    annotator = Annotator(im0, line_width=2)

    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        masks = results[0].masks.xy
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for mask, track_id in zip(masks, track_ids):
            annotator.seg_bbox(mask=mask,
                               mask_color=colors(track_id, True),
                               track_label=str(track_id))

    out.write(im0)
    cv2.imshow("instance-segmentation-object-tracking", im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()

0: 448x640 1 car, 140.2ms
Speed: 3.3ms preprocess, 140.2ms inference, 29.8ms postprocess per image at shape (1, 3, 448, 640)
0: 448x640 1 car, 106.8ms
Speed: 2.1ms preprocess, 106.8ms inference, 26.5ms postprocess per image at shape (1, 3, 448, 640)
0: 448x640 2 cars, 100.4ms
Speed: 1.0ms preprocess, 100.4ms inference, 27.5ms postprocess per image at shape (1, 3, 448, 640)
0: 448x640 2 cars, 98.5ms
Speed: 2.1ms preprocess, 98.5ms inference, 34.1ms postprocess per image at shape (1, 3, 448, 640)
0: 448x640 3 cars, 96.1ms
Speed: 2.1ms preprocess, 96.1ms inference, 22.4ms postprocess per image at shape (1, 3, 448, 640)
0: 448x640 3 cars, 91.3ms
Speed: 1.1ms preprocess, 91.3ms inference, 25.6ms postprocess per image at shape (1, 3, 448, 640)
0: 448x640 2 cars, 1 bus, 94.9ms
Speed: 2.1ms preprocess, 94.9ms inference, 23.7ms postprocess per image at shape (1, 3, 448, 640)
0: 448x640 3 cars, 90.5ms
Speed: 2.3ms preprocess, 90.5ms inference, 22.3ms postprocess per image at shape (1, 3, 448, 64

In [3]:
results

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: ultralytics.engine.results.Masks object
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 