In [None]:
import os
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split

# Define paths
ROOT_DIR = "pets"
ANNOTATIONS_DIR = os.path.join(ROOT_DIR, "Annotation")
IMAGES_DIR = os.path.join(ROOT_DIR, "Images")
YOLO_LABELS_DIR = os.path.join(ROOT_DIR, "yolo-labels")

# Create labels directory if not exists
os.makedirs(YOLO_LABELS_DIR, exist_ok=True)

CLASSES = []
print(os.listdir(ANNOTATIONS_DIR))
folders = os.listdir(ANNOTATIONS_DIR)

for folder in folders:
    if folder == ".DS_Store": continue
    files = os.listdir(os.path.join(ANNOTATIONS_DIR, folder))
    for file in files:
        tree = ET.parse(os.path.join(ANNOTATIONS_DIR, folder, file))
        root = tree.getroot()
        for obj in root.findall("object"):
            name = obj.find("name").text
            if name not in CLASSES:
                CLASSES.append(name)
print(CLASSES)
print(len(CLASSES)) # 120

['n02097658-silky_terrier', 'n02092002-Scottish_deerhound', 'n02099849-Chesapeake_Bay_retriever', 'n02091244-Ibizan_hound', 'n02095314-wire-haired_fox_terrier', 'n02091831-Saluki', 'n02102318-cocker_spaniel', 'n02104365-schipperke', 'n02090622-borzoi', 'n02113023-Pembroke', 'n02105505-komondor', 'n02093256-Staffordshire_bullterrier', 'n02113799-standard_poodle', 'n02109961-Eskimo_dog', 'n02089973-English_foxhound', 'n02099601-golden_retriever', 'n02095889-Sealyham_terrier', 'n02085782-Japanese_spaniel', '.DS_Store', 'n02097047-miniature_schnauzer', 'n02110063-malamute', 'n02105162-malinois', 'n02086079-Pekinese', 'n02097130-giant_schnauzer', 'n02113978-Mexican_hairless', 'n02107142-Doberman', 'n02097209-standard_schnauzer', 'n02115913-dhole', 'n02106662-German_shepherd', 'n02106382-Bouvier_des_Flandres', 'n02110185-Siberian_husky', 'n02094258-Norwich_terrier', 'n02093991-Irish_terrier', 'n02094114-Norfolk_terrier', 'n02109525-Saint_Bernard', 'n02093754-Border_terrier', 'n02105251-briar

'\nfor annotation in os.listdir(ANNOTATIONS_DIR):\n    print(annotation)\n    for folder in os.listdir(os.path.join(ANNOTATIONS_DIR, annotation)):\n        print(folder)\n        for file in os.listdir(os.path.join(ANNOTATIONS_DIR, annotation, folder)):\n            print(file)\n\n            #if file.endswith(".xml"):\n            tree = ET.parse(os.path.join(ANNOTATIONS_DIR, annotation, folder, file))\n            root = tree.getroot()\n            for obj in root.findall("object"):\n                name = obj.find("name").text\n                if name not in CLASSES:\n                    CLASSES.append(name)'

In [None]:
def convert_annotation(xml_file, class_name, img_width, img_height):
    """Convert a single XML annotation to YOLO format."""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    label_file_content = []
    for obj in root.findall("object"):
        object_name = obj.find("name").text
        if object_name != class_name:  # Skip objects that don't match the current class
            continue
        
        class_id = CLASSES.index(object_name)  # Get the class index
        bbox = obj.find("bndbox")
        
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)
        
        # Convert to YOLO format
        x_center = ((xmin + xmax) / 2) / img_width
        y_center = ((ymin + ymax) / 2) / img_height
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height
        
        label_file_content.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    return "\n".join(label_file_content)

# Process annotations
image_paths = []
for class_folder in os.listdir(ANNOTATIONS_DIR):
    class_folder_path = os.path.join(ANNOTATIONS_DIR, class_folder)
    
    if not os.path.isdir(class_folder_path):
        continue
    
    # Iterate through XML files in each class folder
    for xml_filename in os.listdir(class_folder_path):
        #if not xml_filename.endswith(".xml"): continue
        
        xml_path = os.path.join(class_folder_path, xml_filename)
        class_name = class_folder.split("-")[1]  # Assuming class folder name follows 'n02085620-Chihuahua' format
        txt_filename = xml_filename.replace(".xml", ".txt")
        txt_path = os.path.join(YOLO_LABELS_DIR, txt_filename)
        
        # Extract image size from XML
        tree = ET.parse(xml_path)
        root = tree.getroot()
        img_width = int(root.find("size/width").text)
        img_height = int(root.find("size/height").text)
        
        # Convert and save
        yolo_data = convert_annotation(xml_path, class_name, img_width, img_height)
        with open(txt_path, "w") as f:
            f.write(yolo_data)
        
        # Store image path for splitting
        image_name = xml_filename.replace(".xml", ".jpg")  # Assuming image format is .jpg
        image_paths.append(os.path.join(IMAGES_DIR, class_folder, image_name))

# Split dataset into training and validation sets
train_images, val_images = train_test_split(image_paths, test_size=0.2, random_state=42)

# Write image paths to train.txt and val.txt
with open(os.path.join(ROOT_DIR, "train.txt"), "w") as f:
    f.writelines(f"{img}\n" for img in train_images)
with open(os.path.join(ROOT_DIR, "val.txt"), "w") as f:
    f.writelines(f"{img}\n" for img in val_images)

# Create data.yaml
import yaml
with open(os.path.join(ROOT_DIR, "data.yaml"), "w") as f:
    yaml_content = {
        'train': os.path.join(ROOT_DIR, "train.txt"),
        'val': os.path.join(ROOT_DIR, "val.txt"),
        'nc': len(CLASSES),
        'names': CLASSES
    }
    yaml.dump(yaml_content, f)

print("Dataset preparation complete!")

Dataset preparation complete!


In [12]:
import os
from sklearn.model_selection import train_test_split

ROOT_DIR = "pets"
IMAGES_DIR = os.path.join(ROOT_DIR, "Images")

# Initialize an empty list to hold image paths
image_paths = []

# Loop through class folders and their images
for class_folder in os.listdir(IMAGES_DIR):
    class_folder_path = os.path.join(IMAGES_DIR, class_folder)
    
    # Make sure it's a directory (class folder)
    if os.path.isdir(class_folder_path):
        # Get all image paths for the current class
        for img_filename in os.listdir(class_folder_path):
            if img_filename.endswith(".jpg"):
                img_path = os.path.join(class_folder_path, img_filename)
                image_paths.append(img_path)

# Split into train and val sets
train_images, val_images = train_test_split(image_paths, test_size=0.2, random_state=42)

# Write paths to train.txt and val.txt
with open(os.path.join(ROOT_DIR, "train.txt"), "w") as f:
    f.writelines(f"{p}\n" for p in train_images)
with open(os.path.join(ROOT_DIR, "val.txt"), "w") as f:
    f.writelines(f"{p}\n" for p in val_images)

print("train.txt and val.txt created successfully!")

train.txt and val.txt created successfully!
