In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.29-py3-none-any.whl (780 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/780.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.7/780.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.5/780.5 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=0.2.5 (from ultralytics)
  Downloading ultralytics_thop-0.2.7-py3-none-any.whl (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached

In [2]:
from ultralytics import YOLO
import cv2
import os

### This reason why the output vid has flickering boxes is because I have set the fps low.

In [8]:
def process_video(video_path, model_path, frames_per_second=2): #I kept the fps low because it would have taken a lot of time to process the video on my laptop, if you've a powerful machine than increase the fps to something like 24 or 30 per second
    # Load the model
    model = YOLO(model_path)

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    save_path = 'output.mp4'
    out = cv2.VideoWriter(save_path, fourcc, fps, (width, height))

    # Object classes
    classNames = ["car", "motorcycle", "bus", "truck"] #I'm assuming that you know these names are in correspondence with the vehicleID below
    vehicleID = [2, 3, 5, 7]

    frame_count = 0
    while cap.isOpened():
        success, img = cap.read()
        if not success:
            break

        if frame_count % (fps // frames_per_second) == 0:
            results = model(img)

            # Process results
            for r in results:
                for box in r.boxes:
                    x1, y1, x2, y2 = box.xyxy[0]
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                    confidence = box.conf[0] * 100
                    cls = int(box.cls[0])

                    if cls in vehicleID:
                        class_name = classNames[vehicleID.index(cls)]

                        # This part of the code is putting a rectangle around the detected object, it's doing it frame by frame as it is in a loop
                        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

                        # This part is putting class name and confidence, you can skip it if you don't want to show the class name and confidence

                        # org = (x1, y1)
                        # font = cv2.FONT_HERSHEY_SIMPLEX
                        # fontScale = 0.6
                        # color = (255, 0, 0)
                        # thickness = 2
                        # cv2.putText(img, f"{class_name} {confidence:.2f}", org, font, fontScale, color, thickness)

        # Write the processed frame to the video
        out.write(img)
        frame_count += 1

    cap.release()
    out.release()
    print(f"Processed video saved as: {save_path}")

In [9]:
# Example usage
video_path = 'https://docs.google.com/uc?export=download&confirm=&id=1pz68D1Gsx80MoPg-_q-IbEdESEmyVLm-'  # Replace with your video path
model_path = 'yolov8x.pt'  # Replace with your model path
process_video(video_path, model_path)


0: 384x640 3 cars, 1 truck, 50.4ms
Speed: 2.8ms preprocess, 50.4ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 1 truck, 49.6ms
Speed: 3.5ms preprocess, 49.6ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 trucks, 62.0ms
Speed: 6.8ms preprocess, 62.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 2 trucks, 62.1ms
Speed: 5.3ms preprocess, 62.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 cars, 2 trucks, 62.6ms
Speed: 3.3ms preprocess, 62.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 5 cars, 2 trucks, 62.0ms
Speed: 5.8ms preprocess, 62.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 trucks, 62.1ms
Speed: 4.6ms preprocess, 62.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 62.1ms
Speed: 3.5ms prepr