In [None]:
from ultralytics import YOLO
import cv2
import cvzone
import time
import math
import numpy as np
from sort import *

# File path for the video
FILE_PATH = "traffic_video.mp4"

# Class names for YOLO model
classNames = [
    "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
    "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
    "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
    "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
    "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa",
    "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard",
    "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
    "teddy bear", "hair drier", "toothbrush"
]

# For Tracker
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.3)
cap = cv2.VideoCapture(FILE_PATH)
model = YOLO("yolov81.pt")

# Get video writer initialized to save the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec
out = cv2.VideoWriter('output_video.mp4', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

while True:
    ret, img = cap.read()
    if not ret:
        break

    results = model(img, stream=True)
    detections = np.empty((0, 5))

    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            
            # Class name
            cls = int(box.cls[0])
            
            # Confidence score
            conf = math.ceil(box.conf[0] * 100) / 100
            
            if conf > 0.5:
                cvzone.putTextRect(img, f'{classNames[cls]} {conf}', (x2, y2), scale=1, thickness=1, colorR=(0, 0, 255))
                currentArray = np.array([x1, y1, x2, y2, conf])
                detections = np.vstack((detections, currentArray))

    resultTracker = tracker.update(detections)

    for res in resultTracker:
        x1, y1, x2, y2, id = res
        x1, y1, x2, y2, id = int(x1), int(y1), int(x2), int(y2), int(id)
        w, h = x2 - x1, y2 - y1
        cvzone.putTextRect(img, f'ID: {id}', (x1, y1), scale=1, thickness=1, colorR=(0, 0, 255))
        cvzone.cornerRect(img, (x1, y1, w, h), l=9, rt=1, colorR=(255, 0, 255))

    out.write(img)

cap.release()
out.release()
cv2.destroyAllWindows()

# Display the processed video in the notebook
from IPython.display import Video
Video("output_video.mp4", embed=True)