In [3]:
import os
import cv2
import random
from ultralytics import YOLO
from tqdm import tqdm

Utility Fucntions

In [9]:
def get_random_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    random_frame_number = random.randint(0, frame_count - 1)
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame_number)
    ret, frame = cap.read()
    cap.release()
    
    return frame

def resize_frame(frame):
    x, y, _ = frame.shape
    scale = random.uniform(0.3, 0.6)
    return cv2.resize(frame, (int(y*scale), int(x*scale)), interpolation=cv2.INTER_AREA)

def insert_frame(base_frame, insert_frame):
    h, w, _ = base_frame.shape
    ih, iw, _ = insert_frame.shape

    x = random.randint(0, w - iw)
    y = random.randint(0, h - ih)
    
    combined_frame = base_frame.copy()
    combined_frame[y:y+ih, x:x+iw] = insert_frame
    
    bbox = (x, y, iw, ih)
    
    return combined_frame, bbox

def save_image_and_annotation(image, bbox, output_image, output_label, index):
    image_path = os.path.join(output_image, f'image_{index}.jpg')
    annotation_path = os.path.join(output_label, f'image_{index}.txt')
    
    cv2.imwrite(image_path, image)
    
    if bbox is None:
        open(annotation_path, 'w').close()
    else:
        x, y, w, h = bbox
        img_h, img_w, _ = image.shape
        center_x = (x + w / 2) / img_w
        center_y = (y + h / 2) / img_h
        width = w / img_w
        height = h / img_h
    
        class_id = 0
        
        with open(annotation_path, 'w') as f:
            f.write(f"{class_id} {center_x} {center_y} {width} {height}\n")

def process_videos(input_folder, output_folder, dataset_size, proc_chance):
    
    video_files = [os.path.join(input_folder, f) for f in os.listdir(input_folder) if f.endswith(('.mp4', '.avi', '.mkv'))]
    
    labels = os.path.join(output_folder, "labels")
    images = os.path.join(output_folder, "images")

    os.makedirs(labels)
    os.makedirs(images)

    for i in tqdm(range(dataset_size)):

        video_file1, video_file2 = random.sample(video_files, 2)
        
        frame1 = get_random_frame(video_file1)

        if random.random() < proc_chance:

            save_image_and_annotation(frame1, None, images, labels, i)
        else:
        
            frame2 = get_random_frame(video_file2)
            
            resized_frame2 = resize_frame(frame2)

            combined_frame, bbox = insert_frame(frame1, resized_frame2)

            save_image_and_annotation(combined_frame, bbox, images, labels, i)


Dataset creation

In [5]:
def create_dataset(input_folder, output_folder, train_size, val_size, test_size, proc_chance=0.5):
    train = os.path.join(output_folder, "train")
    val = os.path.join(output_folder, "valid")
    test = os.path.join(output_folder, "test")

    process_videos(input_folder, train, train_size, proc_chance)
    process_videos(input_folder, val, val_size, proc_chance)
    process_videos(input_folder, test, test_size, proc_chance)


Yolo training

In [None]:
INPUT = ""
OUTPUT = ""
create_dataset(INPUT, OUTPUT, 20000, 3000, 1000)

In [None]:
model = YOLO("yolov8n.pt")
results = model.train(data=os.path.join(OUTPUT, "data.yaml"), epochs=200, device="cpu")
results = model.val()