In [1]:
import os
import cv2
from ultralytics import YOLO

In [2]:
def video_processing(video_path, output_path, detection_threshold):
    input_path = video_path
    output_path = output_path

    cap = cv2.VideoCapture(input_path)

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))    

    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    MODEL_PATH = '.pyenv/runs/detect/train4/weights/last.pt'

    model = YOLO(MODEL_PATH)

    DETECTION_THRESHOLD = detection_threshold

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)[0]

        for result in results.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = result
            if score > DETECTION_THRESHOLD:
                label = model.names[int(class_id)].upper()
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 4)
                cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)

        out.write(frame)
    
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    print(f"Processed video saved to: {output_path}")

In [3]:
video_processing('/home/ubuntu/video/dog.mp4', '/home/ubuntu/video_output/dog.mp4', 0.2)


0: 640x384 1 human_body, 1 dog, 87.0ms
Speed: 7.9ms preprocess, 87.0ms inference, 171.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 human_body, 1 dog, 9.7ms
Speed: 3.4ms preprocess, 9.7ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 human_body, 1 dog, 9.7ms
Speed: 2.4ms preprocess, 9.7ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 human_body, 1 dog, 9.8ms
Speed: 2.3ms preprocess, 9.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 human_body, 1 dog, 9.8ms
Speed: 2.3ms preprocess, 9.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 human_body, 1 dog, 9.7ms
Speed: 2.2ms preprocess, 9.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 human_body, 1 dog, 9.5ms
Speed: 2.3ms preprocess, 9.5ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 human_body, 1 dog, 9.3ms
Speed: 2.0ms

In [7]:
video_processing('/home/ubuntu/video/pedestrian1.mp4', '/home/ubuntu/video_output/pedestrian1_detect.mp4', 0.2)


0: 384x640 8 human_bodys, 10.1ms
Speed: 2.3ms preprocess, 10.1ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 human_bodys, 9.0ms
Speed: 2.1ms preprocess, 9.0ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 human_bodys, 9.0ms
Speed: 2.1ms preprocess, 9.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 human_bodys, 9.1ms
Speed: 1.9ms preprocess, 9.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 human_bodys, 8.9ms
Speed: 2.0ms preprocess, 8.9ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 human_bodys, 9.0ms
Speed: 1.8ms preprocess, 9.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 human_bodys, 8.9ms
Speed: 2.0ms preprocess, 8.9ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 human_bodys, 9.0ms
Speed: 1.9ms preprocess, 9.0ms inference, 1.1ms postprocess pe

In [10]:
video_processing('/home/ubuntu/video/chicago.mp4', '/home/ubuntu/video_output/chicago_detect.mp4', 0.2)


0: 384x640 2 cars, 1 truck, 9.6ms
Speed: 2.3ms preprocess, 9.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 10.4ms
Speed: 2.1ms preprocess, 10.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 9.1ms
Speed: 2.0ms preprocess, 9.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 9.3ms
Speed: 2.0ms preprocess, 9.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 9.4ms
Speed: 2.1ms preprocess, 9.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 9.2ms
Speed: 1.9ms preprocess, 9.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 9.0ms
Speed: 2.3ms preprocess, 9.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 truck, 10.5ms
Speed: 2.0ms preprocess, 10.5ms inference, 1.

In [11]:
video_processing('/home/ubuntu/video/car.mp4', '/home/ubuntu/video_output/car_detect.mp4', 0.2)


0: 384x640 3 cars, 9.7ms
Speed: 2.6ms preprocess, 9.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 9.3ms
Speed: 2.6ms preprocess, 9.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 9.4ms
Speed: 2.5ms preprocess, 9.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 9.4ms
Speed: 2.5ms preprocess, 9.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 9.5ms
Speed: 2.5ms preprocess, 9.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 9.6ms
Speed: 2.6ms preprocess, 9.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 9.7ms
Speed: 2.5ms preprocess, 9.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 9.6ms
Speed: 2.5ms preprocess, 9.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 9.4m

In [12]:
video_processing('/home/ubuntu/bus.mp4', '/home/ubuntu/video_output/bus_detect.mp4', 0.2)


0: 384x640 1 human_body, 1 bus, 9.5ms
Speed: 2.0ms preprocess, 9.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 human_body, 1 bus, 9.1ms
Speed: 2.4ms preprocess, 9.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 human_body, 1 bus, 8.9ms
Speed: 1.9ms preprocess, 8.9ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 human_body, 1 bus, 9.0ms
Speed: 1.9ms preprocess, 9.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 human_body, 1 bus, 9.7ms
Speed: 1.9ms preprocess, 9.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 human_body, 1 bus, 8.9ms
Speed: 1.8ms preprocess, 8.9ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 human_body, 1 bus, 9.6ms
Speed: 2.8ms preprocess, 9.6ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 human_body, 1 bus, 10.5ms
Speed: 1.9ms pr