In [14]:
import os
import shutil
import pandas as pd

# Define paths
base_dir = "classification-data"
yolo_dir = os.path.join(base_dir, "yolo")  
splits = ["train", "valid", "test"] 
classes = ['Implant', 'Fillings', 'Impacted Tooth', 'Cavity']
class_to_id = {cls: idx for idx, cls in enumerate(classes)}

# Conversion function
def convert_to_yolo_format(row, img_width, img_height):
    x_center = (row['xmin'] + row['xmax']) / 2 / img_width
    y_center = (row['ymin'] + row['ymax']) / 2 / img_height
    width = (row['xmax'] - row['xmin']) / img_width
    height = (row['ymax'] - row['ymin']) / img_height
    class_id = class_to_id[row['class']]
    return f"{class_id} {x_center} {y_center} {width} {height}"

os.makedirs(yolo_dir, exist_ok=True)

def rename_image(filename):
    return filename.split('_jpg')[0] + '_.jpg'

# Process each split
for split in splits:
    split_dir = os.path.join(base_dir, split)
    annotations_file = os.path.join(split_dir, "_annotations.csv")
    annotations = pd.read_csv(annotations_file)

    # YOLO-specific directories
    yolo_split_dir = os.path.join(yolo_dir, split)
    images_dir = os.path.join(yolo_split_dir, "images")
    labels_dir = os.path.join(yolo_split_dir, "labels")
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    # Copy images and generate YOLO labels
    for _, row in annotations.iterrows():
        # Rename image
        new_filename = rename_image(row['filename'])

        # Copy image to YOLO images folder with new name
        src_image_path = os.path.join(split_dir, row['filename'])
        dst_image_path = os.path.join(images_dir, new_filename)
        shutil.copy(src_image_path, dst_image_path)

        # Create YOLO label file with the new image name
        label_file = os.path.join(labels_dir, f"{new_filename.split('.')[0]}.txt")
        with open(label_file, "w") as f:
            yolo_line = convert_to_yolo_format(
                row, row['width'], row['height']
            )
            f.write(yolo_line + "\n")


In [8]:
import pandas as pd

df = pd.read_csv("classification-data/train/_annotations.csv")
df['class'].unique()

array(['Implant', 'Fillings', 'Impacted Tooth', 'Cavity'], dtype=object)