In [23]:
!pip install deep-sort-realtime



In [18]:
import cv2
import datetime
from ultralytics import YOLO
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort

In [19]:
def create_video_writer(video_cap, output_filename):
    """ save video output """

    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    writer = cv2.VideoWriter(output_filename, fourcc, fps,
                             (frame_width, frame_height))

    return writer

In [24]:
CONFIDENCE_THRESHOLD = 0.8
GREEN = (0, 255, 0)
WHITE = (255, 255, 255)

video_cap = cv2.VideoCapture("/content/test2.mp4")
writer = create_video_writer(video_cap, "output.mp4")

''' load the pre-trained YOLOv5 model '''
model = YOLO("yolov5n.pt")
tracker = DeepSort(max_age=50)

PERSON_CLASS_ID = 0  # Class ID for "person"

while True:
    start = datetime.datetime.now()

    ret, frame = video_cap.read()

    if not ret:
        break

    detections = model(frame)[0]

    """ initialize the list of bounding boxes and confidences """
    results = []
    for data in detections.boxes.data.tolist():  # every detect
        confidence = data[4]
        class_id = int(data[5])

        if float(confidence) < CONFIDENCE_THRESHOLD or class_id != PERSON_CLASS_ID:
            continue

        ''' Bounding Box '''
        xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3])
        results.append([[xmin, ymin, xmax - xmin, ymax - ymin], confidence, class_id])

    """ update the tracker with the new detections """
    tracks = tracker.update_tracks(results, frame=frame)
    for track in tracks:  # every track
        if not track.is_confirmed():
            continue

        """ get the track id and the bounding box """
        track_id = track.track_id
        ltrb = track.to_ltrb()

        xmin, ymin, xmax, ymax = int(ltrb[0]), int(ltrb[1]), int(ltrb[2]), int(ltrb[3])

        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), GREEN, 2)
        cv2.rectangle(frame, (xmin, ymin - 20), (xmin + 20, ymin), GREEN, -1)
        cv2.putText(frame, str(track_id), (xmin + 5, ymin - 8),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, WHITE, 2)

    end = datetime.datetime.now()
    print(f"Time to process 1 frame: {(end - start).total_seconds() * 1000:.0f} milliseconds")

    fps = f"FPS: {1 / (end - start).total_seconds():.2f}"
    cv2.putText(frame, fps, (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

    writer.write(frame)

video_cap.release()
writer.release()
cv2.destroyAllWindows()

PRO TIP 💡 Replace 'model=yolov5n.pt' with new 'model=yolov5nu.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.


0: 416x640 13 persons, 4 cars, 1 traffic light, 1 backpack, 53.0ms
Speed: 2.5ms preprocess, 53.0ms inference, 1.2ms postprocess per image at shape (1, 3, 416, 640)
Time to process 1 frame: 1366 milliseconds

0: 416x640 13 persons, 4 cars, 1 traffic light, 1 backpack, 40.4ms
Speed: 1.7ms preprocess, 40.4ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)
Time to process 1 frame: 98 milliseconds

0: 416x640 13 persons, 4 cars, 1 traffic light, 1 backpack, 40.3ms
Speed: 1.8ms preprocess, 40.3ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)
Time to process 1 frame: 97 milliseconds

0: 416x640 14 persons, 4 cars, 1 traffic light, 1 backpack, 40.1ms
Speed: 1.8ms preprocess, 40.1ms inference, 1.1ms postpr