In [8]:
import cv2
import os
from IPython.display import display
from PIL import Image
import xml.etree.ElementTree as ET

In [9]:
image_dir = "data/pcb_dataset/images/"
output_dir = "data/pcb_dataset/resized_images/"
classes = ['Missing_hole', 'Mouse_bite', 'Open_circuit', 'Short', 'Spur', 'Spurious_copper']

In [10]:
os.makedirs(output_dir, exist_ok=True)

In [5]:
# Loop through each defect class folder
for class_name in os.listdir(image_dir):
    class_path = os.path.join(image_dir, class_name)
    
    # Ensure it's a directory
    if os.path.isdir(class_path):
        output_class_dir = os.path.join(output_dir, class_name)
        os.makedirs(output_class_dir, exist_ok=True)  # Create class subfolder in output
        
        # Process each image inside the class folder
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            img = cv2.imread(img_path)
            
            if img is None:
                print(f"Skipping {img_name} (unable to read)")
                continue
            
            # Resize image to 640x640 for YOLO
            img_resized = cv2.resize(img, (640, 640))
            
            # Save resized image in the corresponding output class folder
            cv2.imwrite(os.path.join(output_class_dir, img_name), img_resized)

print("✅ Dataset preprocessing complete! All images are resized and saved.")

✅ Dataset preprocessing complete! All images are resized and saved.


In [12]:
def convert_voc_to_yolo(xml_file, output_dir):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_name = root.find("filename").text
    image_size = root.find("size")
    img_w, img_h = int(image_size.find("width").text), int(image_size.find("height").text)

    # Create corresponding label file
    yolo_label_path = os.path.join(output_dir, image_name.replace(".jpg", ".txt"))

    with open(yolo_label_path, "w") as yolo_file:
        for obj in root.findall("object"):
            class_name = obj.find("name").text
            if class_name not in classes:
                continue  # Skip unknown classes
            
            class_idx = classes.index(class_name)
            bbox = obj.find("bndbox")

            x_min, y_min, x_max, y_max = map(int, [
                bbox.find("xmin").text, bbox.find("ymin").text, 
                bbox.find("xmax").text, bbox.find("ymax").text
            ])

            x_center = (x_min + x_max) / (2.0 * img_w)
            y_center = (y_min + y_max) / (2.0 * img_h)
            width = (x_max - x_min) / img_w
            height = (y_max - y_min) / img_h

            yolo_file.write(f"{class_idx} {x_center} {y_center} {width} {height}\n")


In [13]:
xml_dir = "data/pcb_dataset/Annotations/"
output_root_dir = "data/pcb_dataset/yolo_labels/"

In [15]:
# Process each class folder
for class_name in os.listdir(xml_dir):
    class_dir = os.path.join(xml_dir, class_name)

    if os.path.isdir(class_dir):  # Ensure it's a directory
        output_class_dir = os.path.join(output_root_dir, class_name)
        os.makedirs(output_class_dir, exist_ok=True)

        # Process each XML file in the class directory
        for xml_file in os.listdir(class_dir):
            if xml_file.endswith(".xml"):
                convert_voc_to_yolo(os.path.join(class_dir, xml_file), output_class_dir)

print("✅ All Pascal VOC annotations successfully converted to YOLO format!")

✅ All Pascal VOC annotations successfully converted to YOLO format!


In [18]:
train_dir = "data/pcb_dataset/train/"
val_dir = "data/pcb_dataset/val/"

In [20]:
import shutil
import random

# Ensure class subdirectories in train/val exist
for class_name in classes:
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)

    # Find all jpg files for the current class
    class_files = [f for f in os.listdir(os.path.join(output_dir, class_name)) if f.endswith('.jpg')]
    total = len(class_files)
    
    # Shuffle and split data: 80% for training, 20% for validation
    random.shuffle(class_files)
    train_count = int(total * 0.8)

    # Move 80% of images to the train directory
    for i in range(train_count):
        shutil.move(os.path.join(output_dir, class_name, class_files[i]), os.path.join(train_dir, class_name))

    # Move remaining 20% to the validation directory
    for i in range(train_count, total):
        shutil.move(os.path.join(output_dir, class_name, class_files[i]), os.path.join(val_dir, class_name))

print("✅ Data split into training and validation sets successfully!")


✅ Data split into training and validation sets successfully!
