### YOLO V8 Plane Detector: Object detection & tracking

Features
* process a video
* using YOLO v8 model
* using deep_sort for Object Tracking (https://github.com/levan92/deep_sort_realtime)

Added features:
* YOLO model is incapsulated in the ObjectDetector class
* process only every n frames (for speed up)

In [1]:
import math
import numpy as np
from tqdm import tqdm
import cv2

from object_detector import ObjectDetector

# added for tracking with DeepSort
from deep_sort_realtime.deepsort_tracker import DeepSort

In [2]:
# YOLO custom trained path (passati a modello large)
MODEL_PATH = "/home/datascience/yolov8/runs/detect/train12/weights/best.pt"

# Video in input
# pattuglia di jet
# VIDEO_PATH = "mixkit-jet1.mp4"
# VIDEO_PATH = "mixkit-crossing.mp4"
VIDEO_INPUT = "mixkit-military.mp4"

CODEC = "mp4v"
FOURCC = cv2.VideoWriter_fourcc(*CODEC)

# to create the name for the video with BB + Id
ONLY_NAME = VIDEO_INPUT.split(".")[0]
VIDEO_OUTPUT = f"{ONLY_NAME}_bb.mp4"

# settings for the BB that will be added
# remember: OpenCV is BGR
COLOR = (0, 0, 255)  # red

TEXT_COLOR = (0, 0, 0)  # black
TICKNESS = 1
TEXT_TICKNESS = 2
FONT = cv2.FONT_HERSHEY_SIMPLEX
FONT_SIZE = 0.6

# shift from BB
TEXT_OFFSET = 10

# DeepSort parameters
# 2 seconds (if fps=30)
MAX_AGE = 60

# to speed up you can choose to process only every... x frames
PROCESS_EVERY = 2

In [3]:
# for the original video
cap = cv2.VideoCapture(VIDEO_INPUT)

n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = math.ceil(cap.get(cv2.CAP_PROP_FPS))
duration = round(n_frames / fps, 1)

print()
print("Input video analysis:")
print()
print(f"Video duration: {duration} sec.")
print(f"Number of frames: {n_frames}")
print(f"Fps: {fps}")
print()


Input video analysis:

Video duration: 26.4 sec.
Number of frames: 792
Fps: 30



#### Apply YOLO + Deepsort to single frames

In [4]:
print(f"Processing input video {VIDEO_INPUT}...")
print()

# read the first frame and initialize
ret, frame = cap.read()

# take height, width from the first frame
height, width, _ = frame.shape

# to take into account PROCESS_EVERY
new_fps = math.ceil(fps / PROCESS_EVERY)

# the annotated (output) video
video = cv2.VideoWriter(VIDEO_OUTPUT, FOURCC, new_fps, (width, height))

# load and init YOLO
object_detector = ObjectDetector(model_path=MODEL_PATH)

# init DeepSort
tracker = DeepSort(max_age=MAX_AGE)

# for stats
n_track_confirmed = 0
n_track_not_confirmed = 0
frame_counter = 1

# process frame by frame
# tqdm for progress bar
with tqdm(total=n_frames) as pbar:
    while ret:
        # process every PROCESS_EVERY frame
        if frame_counter % PROCESS_EVERY == 0:
            # calls the YOLO model
            detections = object_detector.detect(frame)
            # detections is a list with tuples of this kind
            # ([l, t, w, h], conf, v_cls)

            # apply deepsort
            tracks = tracker.update_tracks(detections, frame=np.asarray(frame))

            # make a copy to add bb
            new_image = frame.copy()

            # add bb + id to image
            for track in tracks:
                if not track.is_confirmed():
                    # skip
                    n_track_not_confirmed += 1
                else:
                    n_track_confirmed += 1

                    # this way you get the track id
                    track_id = track.track_id

                    # get BB from tracker
                    ltrb = track.to_ltrb().astype(int)

                    # check if bb is correctly formed
                    if len(ltrb) == 4:
                        # add BB to image
                        new_image = cv2.rectangle(
                            new_image,
                            (ltrb[0], ltrb[1]),
                            (ltrb[2], ltrb[3]),
                            COLOR,
                            TICKNESS,
                        )
                        # add text with Id (tracker)
                        text_position = (ltrb[0], ltrb[1] - TEXT_OFFSET)
                        cv2.putText(
                            new_image,
                            track_id,
                            text_position,
                            FONT,
                            FONT_SIZE,
                            TEXT_COLOR,
                            TEXT_TICKNESS,
                        )

            # write the annotated image in the video
            video.write(new_image)

        # update the progress bar
        pbar.update(1)

        # read next frame from video
        ret, frame = cap.read()
        frame_counter += 1

# close the output
video.release()

# close the input
cap.release()

Processing input video mixkit-military.mp4...

Loading YOLO v8 model..
Loaded YOLO v8 model..



100%|██████████| 792/792 [00:54<00:00, 14.61it/s]


In [5]:
print()
print("Process video with YOLO v8 Model + Deep Sort correctly terminated.")
print()
print("Tracker stats:")
print(f"# track not confirmed: {n_track_not_confirmed}")
print(f"# track confirmed: {n_track_confirmed}")


Process video with YOLO v8 Model + Deep Sort correctly terminated.

Tracker stats:
# track not confirmed: 39
# track confirmed: 2452
