In [25]:
import os
import yaml
from random import randrange
import shutil

In [26]:
images_train_dir = os.path.join("", 'images', 'train')
images_val_dir = os.path.join("", 'images', 'val')
labels_train_dir = os.path.join("", 'labels', 'train')
labels_val_dir = os.path.join("", 'labels', 'val')

os.makedirs(images_train_dir, exist_ok=True)
os.makedirs(images_val_dir, exist_ok=True)
os.makedirs(labels_train_dir, exist_ok=True)
os.makedirs(labels_val_dir, exist_ok=True)

dataset_dir = os.path.join("", "dataset")
os.makedirs(dataset_dir, exist_ok=True)

In [27]:
def create_yolo_training_config(path, product_specific=False, product_type_id=None, names=None):
    # Ensure the path exists
    if not os.path.exists(path):
        os.makedirs(path)

    # Create the folder structure
    images_train_dir = os.path.join('images', 'train')
    images_val_dir = os.path.join('images', 'val')
    labels_train_dir = os.path.join('labels', 'train')
    labels_val_dir = os.path.join('labels', 'val')

    os.makedirs(images_train_dir, exist_ok=True)
    os.makedirs(images_val_dir, exist_ok=True)
    os.makedirs(labels_train_dir, exist_ok=True)
    os.makedirs(labels_val_dir, exist_ok=True)

    dataset_dir = os.path.join("", "dataset")
    os.makedirs(dataset_dir, exist_ok=True)

    labels_names_val, labels_names_train = split_data()
    #update train and val sets
    names = []
    names = update_label(labels_names_val, labels_val_dir, product_type_id, names)
    names = update_label(labels_names_train, labels_train_dir, product_type_id, names)

    # Create the YAML content
    yaml_content = {
        'train': os.path.join(path, 'images', 'train'),
        'val': os.path.join(path, 'images', 'val'),
        'labels': {
            'train': labels_train_dir,
            'val': labels_val_dir
        },
        'nc': len(names) if names else 0,
        'names': names if names else []
    }
    if product_specific and product_type_id is not None:
        yaml_content['product_type_id'] = product_type_id

    # Write the YAML file
    yaml_path = os.path.join(path, 'data.yaml')

    with open(yaml_path, 'w') as yaml_file:
        yaml.dump(yaml_content, yaml_file, default_flow_style=False)
    print(f'YOLO training configuration created at {yaml_path}')
    print(f'Folder structure created under {path}')

In [28]:
def count_jpg_images(folder_path, format=".jpg"):
    # Initialize counters for jpg and txt files
    jpg_count = 0
    txt_count = 0

    # Check the format parameter to determine which files to count
    if format == ".jpg":
        # Loop through all files in the specified folder
        for file_name in os.listdir(folder_path):
            # Check if the file name ends with '.jpg'
            if file_name.endswith('.jpg'):
                # Increment the jpg file count
                jpg_count += 1
        # Return the count of jpg files
        return jpg_count
    elif format == ".txt":
        # Loop through all files in the specified folder
        for file_name in os.listdir(folder_path):
            # Check if the file name ends with '.txt'
            if file_name.endswith('.txt'):
                # Increment the txt file count
                txt_count += 1
        # Return the count of txt files
        return txt_count


In [29]:
def get_jpg_image_names(folder_path, format=".jpg"):
    # Check if the format is ".jpg"
    if format == ".jpg":
        # List comprehension to find all files ending with '.jpg' in the specified folder
        jpg_images = [file_name for file_name in os.listdir(folder_path) if file_name.endswith('.jpg')]
        # Return the list of jpg file names
        return jpg_images
    else:
        # List comprehension to find all files ending with '.txt' in the specified folder
        txt_images = [file_name for file_name in os.listdir(folder_path) if file_name.endswith('.txt')]
        # Return the list of txt file names
        return txt_images


In [30]:
def copy_image(src_folder, dest_folder, image_name):
    # Ensure the destination folder exists
    os.makedirs(dest_folder, exist_ok=True)

    # Define the full file paths
    src_path = os.path.join(src_folder, image_name)
    dest_path = os.path.join(dest_folder, image_name)

    # Copy the image
    shutil.copy(src_path, dest_path)
    #print(f'Copied {image_name} to {dest_folder}')

In [31]:
def copy_file(src_folder, dest_folder, file_name):
    # Ensure the destination folder exists
    os.makedirs(dest_folder, exist_ok=True)

    # Define the full file paths
    src_path = os.path.join(src_folder, file_name)
    dest_path = os.path.join(dest_folder, file_name)

    # Copy the file
    shutil.copy(src_path, dest_path)
    #print(f'Copied {file_name} to {dest_folder}')

In [32]:
def split_data():
    # Define directories for dataset, training images, validation images, training labels, and validation labels
    dataset_dir = 'dataset'
    image_train_dir = 'images/train'
    image_val_dir = 'images/val'
    labels_train_dir = 'labels/train'
    labels_val_dir = 'labels/val'

    # Count the number of jpg images in the dataset directory
    number_of_images = count_jpg_images(dataset_dir)

    # Get the list of jpg image names in the dataset directory
    image_names = get_jpg_image_names(dataset_dir)
    # Dictionary to store image names and their corresponding random index
    image_names_index = {}

    # Get the list of txt label names in the dataset directory
    label_names = get_jpg_image_names(dataset_dir, ".txt")
    # Dictionary to store label names and their corresponding random index
    label_names_index = {}

    # Lists to store the names of the labels for training and validation sets
    labels_names_train = []
    labels_names_val = []

    # Add image and label names into the dictionaries with random index values
    for i in range(len(image_names)):
        image_names_index[image_names[i]] = randrange(10)
        label_names_index[label_names[i]] = image_names_index[image_names[i]]

    # Split images and labels into training and validation sets
    for i in range(number_of_images):
        # If the random index is 9 or 0, consider it for validation set
        if list(image_names_index.values())[i] == 9 or list(image_names_index.values())[i] == 0:
            # Copy image to validation directory
            copy_image(dataset_dir, image_val_dir, list(image_names_index.keys())[i])
            # Copy label to validation directory
            copy_file(dataset_dir, labels_val_dir, list(label_names_index.keys())[i])
            # Add label name to validation list
            labels_names_val.append(list(label_names_index.keys())[i])
        else:
            # Otherwise, consider it for training set
            # Copy image to training directory
            copy_image(dataset_dir, image_train_dir, list(image_names_index.keys())[i])
            # Copy label to training directory
            copy_file(dataset_dir, labels_train_dir, list(label_names_index.keys())[i])
            # Add label name to training list
            labels_names_train.append(list(label_names_index.keys())[i])

    # Return the lists of validation and training label names
    return labels_names_val, labels_names_train


In [33]:
def update_label(label_names, path, product_type_id=0, names=None):
    # Initialize an empty string to store the new label content
    st = ""
    # Loop through each label name in the provided list
    for index in range(len(label_names)):
        # Open the current label file in read mode
        with open(path + "/" + label_names[index], "r") as fileref:
            # Read each line in the file
            for i in fileref:
                # Check if the second element in the line matches the product_type_id
                if product_type_id == int(i.split()[1]):
                    # Update the string with the elements after the second one
                    names.append(i.split()[0])
                    st = " ".join(i.split()[2:])

        # Open the same file in write mode to update its content
        with open(path + "/" + label_names[index], 'w') as file:
            # Write the updated string to the file
            file.write(st)
    print(names)
    return names

In [34]:
#update_label(labels_names_val, labels_val_dir)
products = {"Toilet":0, "Bathtub":1, "Sink":2}
#list of names
names = []
print(products["Sink"])

2


In [35]:
products = {"Toilet":0, "Bathtub":1, "Sink":2}

# Example usage
create_yolo_training_config("path/to/directory", product_specific=True, product_type_id=products["Sink"])



['Sink1', 'Sink1', 'Sink5', 'Sink1', 'Sink1', 'Sink5', 'Sink1', 'Sink1']
['Sink1', 'Sink1', 'Sink5', 'Sink1', 'Sink1', 'Sink5', 'Sink1', 'Sink1', 'Sink5', 'Sink1', 'Sink1', 'Sink1', 'Sink1', 'Sink5', 'Sink5', 'Sink5']
YOLO training configuration created at path/to/directory/data.yaml
Folder structure created under path/to/directory
