In [8]:
import os
from ultralytics import YOLO
import cv2

# Paths and Configuration
VIDEOS_DIR = '../raw_custom_data/mov'
OUTPUT_DIR = '../processed_videos_2'
os.makedirs(OUTPUT_DIR, exist_ok=True)

video_name = 'IMG_7153'
file_extension = 'mov'
video_path = os.path.join(VIDEOS_DIR, f'{video_name}.{file_extension}')
video_path_out = os.path.join(OUTPUT_DIR, f'{video_name}_out.{file_extension}')

# Video Properties
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Error opening video file: {video_path}")

ret, frame = cap.read()
if not ret:
    raise ValueError("Failed to read the first frame. Check the video file.")

H, W, _ = frame.shape
fps = int(cap.get(cv2.CAP_PROP_FPS))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))


# Output Codec Selection Based on Extension
codec_map = {
    'mp4': 'MP4V',  # Codec for .mp4
    'mov': 'MJPG'   # Codec for .mov
}

# Output Video Writer
fourcc = cv2.VideoWriter_fourcc(*codec_map.get(file_extension.lower(), 'MJPG'))  # Codec for output video
out = cv2.VideoWriter(video_path_out, fourcc, fps, (W, H))

# Load YOLO model
model_path = os.path.join('..', 'runs', 'detect', 'train', 'weights', 'best.pt')
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}")

# Load a model
model = YOLO(model_path)  # load a custom model

threshold = 0.3
frame_count = 0

class_colors = {
    0: (0, 255, 0),  # Green for class 0
    1: (0, 0, 255)   # Red for class 1
}

while ret:

    results = model(frame)[0]


    for result in results.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = result

        if score > threshold:
            # Select color based on class_id
            color = class_colors.get(int(class_id), (255, 255, 255))  # Default to white if class_id is not found
            
            # Draw bounding box with the selected color
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            
            # Add label with class name and confidence
            label = f"{model.names[int(class_id)]} {score:.2f}"
            cv2.putText(frame, label, (int(x1), int(y1 - 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.3, color, 3, cv2.LINE_AA)

    out.write(frame)
    frame_count += 1
    print(f"Processed frame {frame_count}/{total_frames}")

    ret, frame = cap.read()

cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Output video saved to {video_path_out}")

OpenCV: FFMPEG: tag 0x47504a4d/'MJPG' is not supported with codec id 7 and format 'mov / QuickTime / MOV'
OpenCV: FFMPEG: fallback to use tag 0x6765706a/'jpeg'



0: 640x384 1 Oskar, 243.3ms
Speed: 2.5ms preprocess, 243.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)
Processed frame 1/410

0: 640x384 1 Oskar, 233.8ms
Speed: 3.5ms preprocess, 233.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)
Processed frame 2/410

0: 640x384 1 Oskar, 456.1ms
Speed: 2.1ms preprocess, 456.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)
Processed frame 3/410

0: 640x384 1 Dog, 271.6ms
Speed: 1.7ms preprocess, 271.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)
Processed frame 4/410

0: 640x384 1 Dog, 297.5ms
Speed: 2.8ms preprocess, 297.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)
Processed frame 5/410

0: 640x384 1 Dog, 305.8ms
Speed: 2.3ms preprocess, 305.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)
Processed frame 6/410

0: 640x384 1 Dog, 264.7ms
Speed: 3.0ms preprocess, 264.7ms inference, 1.3ms postprocess per image at shape (1, 

In [2]:
import os
from ultralytics import YOLO
import cv2

# Paths and Configuration
VIDEOS_DIR = '../raw_custom_data/mov'
OUTPUT_DIR = '../processed_videos_2'
os.makedirs(OUTPUT_DIR, exist_ok=True)

video_name = 'IMG_7153'
file_extension = 'mov'
video_path = os.path.join(VIDEOS_DIR, f'{video_name}.{file_extension}')
video_path_out = os.path.join(OUTPUT_DIR, f'{video_name}_out.{file_extension}')

# Video Properties
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Error opening video file: {video_path}")


# H, W, _ = frame.shape
# fps = int(cap.get(cv2.CAP_PROP_FPS))
# total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))


# Output Codec Selection Based on Extension
codec_map = {
    'mp4': 'MP4V',  # Codec for .mp4
    'mov': 'MJPG'   # Codec for .mov
}

# Output Video Writer
# fourcc = cv2.VideoWriter_fourcc(*codec_map.get(file_extension.lower(), 'MJPG'))  # Codec for output video
# out = cv2.VideoWriter(video_path_out, fourcc, fps, (W, H))

# Load YOLO model
model_path = os.path.join('..', 'runs', 'detect', 'train', 'weights', 'best.pt')
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}")

# Load a model
model = YOLO(model_path)  # load a custom model

threshold = 0.3
frame_count = 0

class_colors = {
    0: (0, 255, 0),  # Green for class 0
    1: (0, 0, 255)   # Red for class 1
}

# read frames
ret = True
while ret:

    ret, frame = cap.read()
    if not ret:
        raise ValueError("Failed to read the first frame. Check the video file.")
    
    # detect objects
    results = model.track(frame, persist=True)

    # plot results
    frame_ = results[0].plot()

    # visualize
    cv2.imshow('frame', frame_)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break


0: 640x384 1 Oskar, 298.3ms
Speed: 2.1ms preprocess, 298.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Oskar, 261.4ms
Speed: 2.1ms preprocess, 261.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 260.0ms
Speed: 2.0ms preprocess, 260.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 258.2ms
Speed: 2.2ms preprocess, 258.2ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)



2025-01-03 14:06:29.776 python[78188:5103835] +[IMKClient subclass]: chose IMKClient_Modern
2025-01-03 14:06:29.776 python[78188:5103835] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 640x384 1 Dog, 280.3ms
Speed: 2.3ms preprocess, 280.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 262.3ms
Speed: 1.9ms preprocess, 262.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 288.7ms
Speed: 3.3ms preprocess, 288.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 285.9ms
Speed: 3.7ms preprocess, 285.9ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 268.4ms
Speed: 2.4ms preprocess, 268.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 314.7ms
Speed: 1.9ms preprocess, 314.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 275.7ms
Speed: 2.8ms preprocess, 275.7ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Dog, 262.5ms
Speed: 2.7ms preprocess, 262.5ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 384)



KeyboardInterrupt: 