## Before you start

Let's make sure that we have access to GPU. We can use `nvidia-smi` command to do that.

In [1]:
!nvidia-smi

Fri Dec 29 19:33:55 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 546.33                 Driver Version: 546.33       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3070      WDDM  | 00000000:01:00.0  On |                  N/A |
|  0%   39C    P5              19W / 240W |   1039MiB /  8192MiB |     12%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Setup path

In [2]:
import os

In [3]:
HOME = os.getcwd()
print(HOME)

c:\Github\YOLOv8_ByteTrack_Supervision


In [4]:
VIDEOS_DIR = f"{HOME}/videos/"
os.makedirs(VIDEOS_DIR, exist_ok=True)

RESULTS_DIR = f"{HOME}/results/"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [5]:
SOURCE_VIDEO_PATH_EDIBLE_VS_REGULER = f"{VIDEOS_DIR}EdibleN_VS_RegulerN.mp4"
SOURCE_VIDEO_PATH_EDIBLE_VS_REJECT = f"{VIDEOS_DIR}EdibleN_VS_RejectN.mp4"
SOURCE_VIDEO_PATH_REGULER_VS_REJECT = f"{VIDEOS_DIR}RegulerN_VS_RejectN.mp4"

## Check YOLOv8 Installation

In [6]:
from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.231 🚀 Python-3.10.11 torch-2.1.2+cu118 CUDA:0 (NVIDIA GeForce RTX 3070, 8192MiB)
Setup complete ✅ (20 CPUs, 31.8 GB RAM, 399.3/930.8 GB disk)


## Check Roboflow Supervision Installation

In [7]:
from IPython import display
display.clear_output()

import supervision as sv
print("supervision.__version__:", sv.__version__)

supervision.__version__: 0.17.1


## Load pre-trained YOLOv8 model

In [8]:
MODEL = "./models/KopraV6_result_YOLOv8m/weights/best.pt"

In [9]:
from ultralytics import YOLO

model = YOLO(MODEL)
model.fuse()

Model summary (fused): 218 layers, 25843234 parameters, 0 gradients, 78.7 GFLOPs


## Predict and annotate whole video

In [10]:
import supervision as sv
import numpy as np

In [11]:
# dict maping class_id to class_name
CLASS_NAMES_DICT = model.model.names

# class_ids of interest
selected_classes = [0, 1, 2, 3, 4, 5]

In [42]:
# setting start and end of line
LINE_START = sv.Point(150, 500)
LINE_END = sv.Point(1810, 500)

In [47]:
# change this to change the source video
SOURCE_VIDEO_PATH = SOURCE_VIDEO_PATH_REGULER_VS_REJECT

In [48]:
# do not change this
if(SOURCE_VIDEO_PATH == SOURCE_VIDEO_PATH_EDIBLE_VS_REGULER):
    TARGET_VIDEO_PATH = f"{RESULTS_DIR}EdibleN_VS_RegulerN_result.mp4"
elif(SOURCE_VIDEO_PATH == SOURCE_VIDEO_PATH_EDIBLE_VS_REJECT):
    TARGET_VIDEO_PATH = f"{RESULTS_DIR}EdibleN_VS_RejectN_result.mp4"
elif(SOURCE_VIDEO_PATH == SOURCE_VIDEO_PATH_REGULER_VS_REJECT):
    TARGET_VIDEO_PATH = f"{RESULTS_DIR}RegulerN_VS_RejectN_result.mp4"

In [49]:
# do not change this
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)

VideoInfo(width=1920, height=1080, fps=30, total_frames=3760)

In [50]:
# create BYTETracker instance
byte_tracker = sv.ByteTrack(track_thresh=0.25, track_buffer=30, match_thresh=0.8, frame_rate=30)

# create VideoInfo instance
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)

# create frame generator
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)

# create LineZone instance, it is previously called LineCounter class
line_zone = sv.LineZone(start=LINE_START, end=LINE_END)

# create instance of BoxAnnotator
box_annotator = sv.BoxAnnotator(thickness=4, text_thickness=4, text_scale=2)

# create instance of TraceAnnotator
trace_annotator = sv.TraceAnnotator(thickness=4, trace_length=50)

# create LineZoneAnnotator instance, it is previously called LineCounterAnnotator class
line_zone_annotator = sv.LineZoneAnnotator(thickness=4, text_thickness=4, text_scale=2)

# define call back function to be used in video processing
def callback(frame: np.ndarray, index:int) -> np.ndarray:
    # model prediction on single frame and conversion to supervision Detections
    results = model(frame, verbose=False)[0]
    detections = sv.Detections.from_ultralytics(results)
    # only consider class id from selected_classes define above
    detections = detections[np.isin(detections.class_id, selected_classes)]
    # tracking detections
    detections = byte_tracker.update_with_detections(detections)
    labels = [
        f"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f}"
        for _, _, confidence, class_id, tracker_id
        in detections
    ]
    annotated_frame = trace_annotator.annotate(
        scene=frame.copy(),
        detections=detections
    )
    annotated_frame=box_annotator.annotate(
        scene=annotated_frame,
        detections=detections,
        labels=labels)

    # update line counter
    line_zone.trigger(detections)
    # return frame with box and line annotated result
    return  line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)

# process the whole video
sv.process_video(
    source_path = SOURCE_VIDEO_PATH,
    target_path = TARGET_VIDEO_PATH,
    callback=callback
)