In [None]:
import splitfolders
import os
import shutil
import yaml

In [None]:
def split_dataset(images_folder, labels_folder, output_folder, ratio=(.6, .2, .2)):
    # Create temporary merged folder structure
    temp_folder = "temp_dataset"
    os.makedirs(temp_folder, exist_ok=True)
    os.makedirs(os.path.join(temp_folder, 'images'), exist_ok=True)
    os.makedirs(os.path.join(temp_folder, 'labels'), exist_ok=True)

    for img_file in os.listdir(images_folder):
        img_path = os.path.join(images_folder, img_file)
        label_path = os.path.join(labels_folder, img_file.replace('.jpg', '.txt'))

        if os.path.exists(label_path):
            shutil.copy(img_path, os.path.join(temp_folder, 'images', img_file))
            shutil.copy(label_path, os.path.join(temp_folder, 'labels', img_file.replace('.jpg', '.txt')))

    # Split the dataset
    splitfolders.ratio(temp_folder, output=output_folder, seed=1337, ratio=ratio, group_prefix=None)

    # Move the images and labels to their respective folders
    for split in ['train', 'val', 'test']:
        split_images_folder = os.path.join(output_folder, split, 'images')
        split_labels_folder = os.path.join(output_folder, split, 'labels')

        os.makedirs(split_images_folder, exist_ok=True)
        os.makedirs(split_labels_folder, exist_ok=True)

        for file in os.listdir(os.path.join(output_folder, split)):
            if file.endswith('.jpg'):
                shutil.move(os.path.join(output_folder, split, file), split_images_folder)
            elif file.endswith('.txt'):
                shutil.move(os.path.join(output_folder, split, file), split_labels_folder)

    # Remove the temporary folder
    shutil.rmtree(temp_folder)

# Define your dataset folders
images_folder = 'c:\\repos\\python\\Bacteria_counter\\validation_testing_etc\\augmentation\\augmented_images'
labels_folder = 'c:\\repos\\python\\Bacteria_counter\\validation_testing_etc\\augmentation\\augmented_labels'
output_folder = 'c:\\repos\\python\\Bacteria_counter\\validation_testing_etc\\Train_Test_Split\\dataset_train-test\\COF'

split_dataset(images_folder, labels_folder, output_folder)

In [None]:
import os
import yaml

def create_yaml_file(output_folder, class_names):
    # Create a dictionary for class names with indices
    names_dict = {i: name for i, name in enumerate(class_names)}
    
    data = {
        'train': os.path.join(output_folder, 'train/images').replace("\\", "/"),
        'val': os.path.join(output_folder, 'val/images').replace("\\", "/"),
        'test': os.path.join(output_folder, 'test/images').replace("\\", "/"),
        'nc': len(class_names),
        'names': names_dict
    }

    with open(os.path.join(output_folder, 'data.yaml'), 'w') as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False)

# Define the output folder where the split dataset is saved
output_folder = 'validation_testing_etc/Train_Test_Split/dataset_train-test/COF'

# Define the class names (adjust based on your dataset)
class_names = ['colony']

create_yaml_file(output_folder, class_names)
