In [None]:
import os
import random
import shutil

def create_subset(source_path, dest_path, num_samples, random_state=42):
    random.seed(random_state)
    
    images_path = os.path.join(source_path, 'images')
    labels_path = os.path.join(source_path, 'labels')

    os.makedirs(dest_path, exist_ok=True)
    os.makedirs(os.path.join(dest_path, 'images'), exist_ok=True)
    os.makedirs(os.path.join(dest_path, 'labels'), exist_ok=True)
    
    image_files = [f for f in os.listdir(images_path) if f.endswith('.jpg')]
    annotation_files = [f for f in os.listdir(labels_path) if f.endswith('.txt')]
    
    assert len(image_files) == len(annotation_files), "Количество изображений и аннотаций должно совпадать"

    selected_indices = random.sample(range(len(image_files)), num_samples)

    for idx in selected_indices:
        image_file = image_files[idx]
        annotation_file = annotation_files[idx]
        shutil.copy(os.path.join(images_path, image_file), os.path.join(dest_path, 'images', image_file))
        shutil.copy(os.path.join(labels_path, annotation_file), os.path.join(dest_path, 'labels', annotation_file))

    print(f"Создана подвыборка из {num_samples} элементов в {dest_path}")

# Пример использования:
# Путь к директориям train_set и validation_set
train_set_path = '/path/to/train_set'
validation_set_path = '/path/to/validation_set'

# Путь к директориям для подвыборок
train_subset_path = '/path/to/train_subset'
validation_subset_path = '/path/to/validation_subset'

# Создание подвыборок
create_subset(train_set_path, train_subset_path, 1000, random_state=42)
create_subset(validation_set_path, validation_subset_path, 200, random_state=42)
