In [None]:
import os
import shutil
import random
from collections import defaultdict

In [None]:
dataset_path = "dataset"
output = {"train": "train", "test": "test", "validation": "validation"}
train_path = "train"
test_path = "test"
valid_path = "validation"
classes = ["A", "B1", "B2", "B4", "B5", "B6", "G"] 
train = 0.7
validation = 0.15
test = 0.15

In [None]:
def extract_labels(file):
    parts = file[:-4].split("label_")
    if len(parts) < 2:
        return [0] * len(classes)
    return [1 if cls in parts[1].split('-') else 0 for cls in classes]

In [None]:
videos_by_class = defaultdict(list)
for file in os.listdir(dataset_path):
    if file.endswith('.mp4'):
        label_index = extract_labels(file).index(1)
        cls = classes[label_index]
        if cls in classes:
            videos_by_class[cls].append(file)

In [None]:
for cls, videos in videos_by_class.items():    
    total = len(videos)

    # If the class has less than 75 videos, put all into the train set
    if total < 75:
        train_videos = videos
        val_videos = []
        test_videos = []
    else:
        train_count = int(total * train)
        val_count = int(total * validation)
        test_count = total - train_count - val_count

        train_videos = videos[:train_count]
        val_videos = videos[train_count:train_count + val_count]
        test_videos = videos[train_count + val_count:]

    for video in train_videos:
        shutil.copy(os.path.join(dataset_path, video), os.path.join(output["train"], video))
    for video in val_videos:
        shutil.copy(os.path.join(dataset_path, video), os.path.join(output["validation"], video))
    for video in test_videos:
        shutil.copy(os.path.join(dataset_path, video), os.path.join(output["test"], video))


In [None]:
import os
import shutil
import cv2

video_dirs = [
    r"C:\Users\Olenka\Downloads\1-1004",
    r"C:\Users\Olenka\Downloads\1005-2004",
    r"C:\Users\Olenka\Downloads\2005-2804",
    r"C:\Users\Olenka\Downloads\2805-3319",
    r"C:\Users\Olenka\Downloads\3320-3954",
]

classes = ["A", "B1", "B2", "B4", "B5", "B6", "G"] 
num_test = 15
num_val = 10

train_dir = r"C:\Users\Olenka\PycharmProjects\violence_detection\train"
output_test_dir = r"C:\Users\Olenka\PycharmProjects\violence_detection\test"
output_val_dir = r"C:\Users\Olenka\PycharmProjects\violence_detection\validation"

os.makedirs(output_test_dir, exist_ok=True)
os.makedirs(output_val_dir, exist_ok=True)


def get_video_duration_sec(video_path: str) -> float:
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Warning: Cannot open video {video_path}")
        return 0
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    cap.release()
    return frame_count / fps if fps else 0


def clean_filename(filename: str) -> str:
    return filename.replace("#", "")

train_filenames_clean = set(clean_filename(f) for f in os.listdir(train_dir) if os.path.isfile(os.path.join(train_dir, f)))

videos_by_class = {cls: [] for cls in classes}

print("Collecting videos from source folders...")
for dir_path in video_dirs:
    for fname in os.listdir(dir_path):
        full_path = os.path.join(dir_path, fname)
        if not os.path.isfile(full_path):
            continue
        
        labels = extract_labels(fname)
        if sum(labels) == 1:
            for i, cls in enumerate(classes):
                if labels[i] == 1:
                    clean_name = clean_filename(fname)
                    if clean_name not in train_filenames_clean:
                        duration = get_video_duration_sec(full_path)
                        videos_by_class[cls].append((full_path, duration, fname))

for cls in classes:
    videos_by_class[cls].sort(key=lambda x: x[1])

for cls in classes:
    # Беремо для цього класу достатньо відео, щоб покрити тест і валідацію
    needed_videos = num_test + num_val
    selected = videos_by_class[cls][:needed_videos]

    if len(selected) < needed_videos:
        print(f"Warning: Only found {len(selected)} videos for class {cls} to fill test and validation")

    test_videos = selected[:num_test]
    val_videos = selected[num_test:num_test + num_val]

    for src_path, _, fname in test_videos:
        dst_path = os.path.join(output_test_dir, fname)
        shutil.copy(src_path, dst_path)
        print(f"Copied {fname} to test")

    for src_path, _, fname in val_videos:
        dst_path = os.path.join(output_val_dir, fname)
        shutil.copy(src_path, dst_path)
        print(f"Copied {fname} to validation")
