In [163]:
import os
import re
import yaml
from random import randrange
import shutil

In [None]:
images_train_dir = os.path.join("", 'images', 'train')
images_val_dir = os.path.join("", 'images', 'val')
labels_train_dir = os.path.join("", 'labels', 'train')
labels_val_dir = os.path.join("", 'labels', 'val')

os.makedirs(images_train_dir, exist_ok=True)
os.makedirs(images_val_dir, exist_ok=True)
os.makedirs(labels_train_dir, exist_ok=True)
os.makedirs(labels_val_dir, exist_ok=True)

dataset_dir = os.path.join("", "dataset")
os.makedirs(dataset_dir, exist_ok=True)

In [165]:
def create_yolo_training_config(path, product_specific=False, product_type_id=None, names=None):
    # Ensure the path exists
    if not os.path.exists(path):
        os.makedirs(path)

    # Create the folder structure
    images_train_dir = os.path.join('images', 'train')
    images_val_dir = os.path.join('images', 'val')
    labels_train_dir = os.path.join('labels', 'train')
    labels_val_dir = os.path.join('labels', 'val')

    os.makedirs(images_train_dir, exist_ok=True)
    os.makedirs(images_val_dir, exist_ok=True)
    os.makedirs(labels_train_dir, exist_ok=True)
    os.makedirs(labels_val_dir, exist_ok=True)

    dataset_dir = os.path.join("", "dataset") # call the dataset "dataset"
    os.makedirs(dataset_dir, exist_ok=True) # makes dataset directory

    # calls split_data() function and splits data into 80% training data and 20% validation data
    split_data(product_specific, product_type_id, names)


    # Create the YAML content
    yaml_content = {
        'train': os.path.join(path, 'images', 'train'),
        'val': os.path.join(path, 'images', 'val'),
        'labels': {
            'train': labels_train_dir,
            'val': labels_val_dir
        },
        'nc': len(names) if names else 0,
        'names': names if names else []
    }
    if product_specific and product_type_id is not None:
        yaml_content['product_type_id'] = product_type_id

    # Write the YAML file
    yaml_path = os.path.join(path, 'data.yaml')

    with open(yaml_path, 'w') as yaml_file:
        yaml.dump(yaml_content, yaml_file, default_flow_style=False)
    print(f'YOLO training configuration created at {yaml_path}')
    print(f'Folder structure created under {path}')

In [166]:
def count_jpg_images(folder_path, format = ".jpg"):
    jpg_count = 0
    txt_count = 0
    if format == ".jpg":
      for file_name in os.listdir(folder_path):
        if file_name.endswith('.jpg'):
            jpg_count += 1
      return jpg_count
    elif format == ".txt":
      for file_name in os.listdir(folder_path):
        if file_name.endswith('.txt'):
            txt_count += 1
      return txt_count

In [167]:
def get_jpg_image_names(folder_path, format = ".jpg"):
    if format == ".jpg":
      jpg_images = [file_name for file_name in os.listdir(folder_path) if file_name.endswith('.jpg')]
      return jpg_images
    else:
      txt_images = [file_name for file_name in os.listdir(folder_path) if file_name.endswith('.txt')]
      return txt_images

In [168]:
def copy_image(src_folder, dest_folder, image_name):
    # Ensure the destination folder exists
    os.makedirs(dest_folder, exist_ok=True)

    # Define the full file paths
    src_path = os.path.join(src_folder, image_name)
    dest_path = os.path.join(dest_folder, image_name)

    # Copy the image
    shutil.copy(src_path, dest_path)
    #print(f'Copied {image_name} to {dest_folder}')

In [169]:
def copy_file_group(src_folder, dest_folder, label_name, product_type_id):
    # Ensure the destination folder exists
    os.makedirs(dest_folder, exist_ok=True)

    # Define the full file paths
    src_path = os.path.join(src_folder, label_name)
    dest_path = os.path.join(dest_folder, label_name)

    # Initialize a list to hold matching lines
    matching_lines = []

    # Read the source file and find lines with the specific product type ID
    with open(src_path, 'r') as fileref:
        for line in fileref:
            parts = line.split()
            if len(parts) > 1 and parts[1].isdigit() and int(parts[1]) == product_type_id:
                # Convert parts[2] to an integer, subtract 1, and convert back to string
                modified_part2 = str(int(parts[2]) - 1)
                # Construct the new line with the modified part
                new_line = modified_part2 + " " + " ".join(parts[3:]) + "\n" # Keep the rest of the line, the coordinates
                matching_lines.append(new_line)

    # Write the matching lines to the destination file
    with open(dest_path, 'w') as file:
        file.writelines(matching_lines)


In [None]:
def copy_file_names(src_folder, dest_folder, label_name, names):
    # Ensure the destination folder exists
    os.makedirs(dest_folder, exist_ok=True)

    # Define the full file paths
    src_path = os.path.join(src_folder, label_name)
    dest_path = os.path.join(dest_folder, label_name)

    # Initialize a list to hold matching lines
    matching_lines = []

    # Prepare the names list by removing anything but letters and creating a set
    names_list = set(re.sub(r'[^A-Za-z]', '', name) for name in names)
    names_with_indices = {name: str(index) for index, name in enumerate(names_list)}

    # Read the source file and find lines with the specific names
    with open(src_path, 'r') as fileref:
        for line in fileref:
            parts = line.split()
            if len(parts) > 0:
                name_part = parts[0]
                # Check if name_part contains any name in names_list
                for name in names_list:
                    if name in name_part:
                        name_label = names_with_indices[name] # gets label 
                        matching_lines.append(name_label + " " + " ".join(parts[1:]) + "\n")  # Keep the rest of the line after the name
                        break

    # Write the matching lines to the destination file
    with open(dest_path, 'w') as file:
        file.writelines(matching_lines)

In [170]:
def split_data(product_specific=False, product_type_id=None, names=None):
  dataset_dir = 'dataset'

  image_train_dir = 'images/train'
  image_val_dir = 'images/val'

  labels_train_dir = 'labels/train'
  labels_val_dir = 'labels/val'

  number_of_images = count_jpg_images(dataset_dir)

  image_names = get_jpg_image_names(dataset_dir)
  image_names_index = {}

  label_names = get_jpg_image_names(dataset_dir, ".txt")
  label_names_index = {}

  labels_names_train = []
  labels_names_val = []

  #add image and label names into the dictionary
  for i in range(len(image_names)):
    image_names_index[image_names[i]] = randrange(10)
    label_names_index[label_names[i]] = image_names_index[image_names[i]]

  for i in range(number_of_images):
    if(list(image_names_index.values())[i] == 9 or list(image_names_index.values())[i] == 0):
      copy_image(dataset_dir, image_val_dir, list(image_names_index.keys())[i])
      if product_specific and product_type_id is not None: # if group and ID is given
        copy_file_group(dataset_dir, labels_val_dir, list(label_names_index.keys())[i], product_type_id)
      if names is not None:  # if names is given
        copy_file_names(dataset_dir, labels_val_dir, list(label_names_index.keys())[i], names)
      labels_names_val.append(list(label_names_index.keys())[i])

    else:
      copy_image(dataset_dir, image_train_dir, list(image_names_index.keys())[i])
      if product_specific and product_type_id is not None: # if group and ID is given
        copy_file_group(dataset_dir, labels_train_dir, list(label_names_index.keys())[i], product_type_id)
      if names is not None: # if names is given
        copy_file_names(dataset_dir, labels_train_dir, list(label_names_index.keys())[i], names)
      labels_names_train.append(list(label_names_index.keys())[i])

In [171]:
"""label_names = get_jpg_image_names(dataset_dir, ".txt")

with open(dataset_dir + "/" + label_names[0], "r") as fileref:
  for i in fileref:
    print(i.split()[2:])

st = " ".join(i.split()[2:])"""

'label_names = get_jpg_image_names(dataset_dir, ".txt")\n\nwith open(dataset_dir + "/" + label_names[0], "r") as fileref:\n  for i in fileref:\n    print(i.split()[2:])\n\nst = " ".join(i.split()[2:])'

In [172]:
# not needed as label files do not need to be updated
# 
# def update_label(label_names, path, product_type_id=0):
#   st = ""
#   for index in range(len(label_names)):
#     with open(path + "/" + label_names[index], "r") as fileref:
#       for i in fileref:
#         if product_type_id == int(i.split()[1]):
#           st = " ".join(i.split()[2:])


#     with open(path + "/" + label_names[index], 'w') as file:
#       file.write(st)

In [173]:
#update_label(labels_names_val, labels_val_dir)
products = {"Toilet":0, "Bathtub":1, "Sink":2}
print(products["Sink"])

2


In [174]:
products = {"Toilet":0, "Bathtub":1, "Sink":2, "Vanity":3, "Faucet":4}

# Example usage
create_yolo_training_config("path/to/directory", product_specific=True, product_type_id=products["Sink"], names=["Toilet1", "Toilet2", "Toilet3", "Toilet4"])



YOLO training configuration created at path/to/directory/data.yaml
Folder structure created under path/to/directory
