In [22]:
import cv2
import os
from IPython.display import display
from PIL import Image
import xml.etree.ElementTree as ET

In [51]:
image_dir = "data/pcb_dataset/images/"
output_dir = "data/pcb_dataset/resized_images/"
classes = ['missing_hole', 'mouse_bite', 'open_circuit', 'short', 'spur', 'spurious_copper']

In [52]:
os.makedirs(output_dir, exist_ok=True)

In [53]:
# Loop through each defect class folder
for class_name in os.listdir(image_dir):
    class_path = os.path.join(image_dir, class_name)
    
    # Ensure it's a directory
    if os.path.isdir(class_path):
        output_class_dir = os.path.join(output_dir, class_name)
        os.makedirs(output_class_dir, exist_ok=True)  # Create class subfolder in output
        
        # Process each image inside the class folder
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            img = cv2.imread(img_path)
            
            if img is None:
                print(f"Skipping {img_name} (unable to read)")
                continue
            
            # Resize image to 640x640 for YOLO
            img_resized = cv2.resize(img, (640, 640))
            
            # Save resized image in the corresponding output class folder
            cv2.imwrite(os.path.join(output_class_dir, img_name), img_resized)

print("✅ Dataset preprocessing complete! All images are resized and saved.")

Skipping labels (unable to read)
✅ Dataset preprocessing complete! All images are resized and saved.


In [54]:
import os

train_images_dir = 'data/pcb_dataset/resized_images/train'
train_labels_dir = 'data/pcb_dataset/train/labels'

# Check if labels exist for each image in train
for image_name in os.listdir(train_images_dir):
    label_name = image_name.replace('.jpg', '.txt')
    label_path = os.path.join(train_labels_dir, label_name)
    
    if not os.path.exists(label_path):
        print(f"Missing label for image: {image_name}")

In [55]:
import os
import xml.etree.ElementTree as ET

# List of classes to match class names from VOC to YOLO indices
classes = ["mouse_bite", "missing_hole", "spur", "open_circuit", "short", "other_class"]

def convert_voc_to_yolo(xml_file, output_dir):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_name = root.find("filename").text
    image_size = root.find("size")
    img_w, img_h = int(image_size.find("width").text), int(image_size.find("height").text)

    print(f"Processing: {xml_file}, Image: {image_name}, Size: {img_w}x{img_h}")  # Debugging

    # Create corresponding label file
    yolo_label_path = os.path.join(output_dir, image_name.replace(".jpg", ".txt"))

    with open(yolo_label_path, "w") as yolo_file:
        for obj in root.findall("object"):
            class_name = obj.find("name").text
            if class_name not in classes:
                print(f"Skipping {class_name}, not in class list.")  # Debugging
                continue  # Skip unknown classes
            
            class_idx = classes.index(class_name)
            bbox = obj.find("bndbox")

            x_min, y_min, x_max, y_max = map(int, [
                bbox.find("xmin").text, bbox.find("ymin").text, 
                bbox.find("xmax").text, bbox.find("ymax").text
            ])

            print(f"Found class: {class_name}, bbox: {x_min}, {y_min}, {x_max}, {y_max}")  # Debugging

            x_center = (x_min + x_max) / (2.0 * img_w)
            y_center = (y_min + y_max) / (2.0 * img_h)
            width = (x_max - x_min) / img_w
            height = (y_max - y_min) / img_h

            yolo_file.write(f"{class_idx} {x_center} {y_center} {width} {height}\n")


In [56]:
xml_dir = "data/pcb_dataset/Annotations/"  # Path to your XML files
output_dir = "data/pcb_dataset/train/labels/"  # Path to where you want your YOLO labels

# Loop through each class folder
for class_name in os.listdir(xml_dir):
    class_dir = os.path.join(xml_dir, class_name)

    if os.path.isdir(class_dir):  # Ensure it's a directory
        output_class_dir = os.path.join(output_root_dir, class_name)
        os.makedirs(output_class_dir, exist_ok=True)

        for xml_file in os.listdir(class_dir):
            if xml_file.endswith(".xml"):
                convert_voc_to_yolo(os.path.join(class_dir, xml_file), output_class_dir)

print("✅ YOLO Labels generation complete!")

Processing: data/pcb_dataset/Annotations/Mouse_bite/06_mouse_bite_09.xml, Image: 06_mouse_bite_09.jpg, Size: 2868x2316
Found class: mouse_bite, bbox: 415, 762, 490, 803
Found class: mouse_bite, bbox: 806, 705, 846, 788
Found class: mouse_bite, bbox: 968, 1259, 1017, 1360
Found class: mouse_bite, bbox: 307, 1380, 352, 1454
Found class: mouse_bite, bbox: 524, 1871, 565, 1963
Processing: data/pcb_dataset/Annotations/Mouse_bite/05_mouse_bite_06.xml, Image: 05_mouse_bite_06.jpg, Size: 2544x2156
Found class: mouse_bite, bbox: 1217, 618, 1263, 696
Found class: mouse_bite, bbox: 710, 804, 788, 849
Found class: mouse_bite, bbox: 1603, 1374, 1667, 1426
Found class: mouse_bite, bbox: 2270, 1523, 2333, 1570
Found class: mouse_bite, bbox: 2183, 1810, 2266, 1860
Processing: data/pcb_dataset/Annotations/Mouse_bite/01_mouse_bite_20.xml, Image: 01_mouse_bite_20.jpg, Size: 3034x1586
Found class: mouse_bite, bbox: 1339, 1366, 1382, 1410
Found class: mouse_bite, bbox: 995, 1312, 1040, 1351
Found class: mo

In [44]:
xml_dir = "data/pcb_dataset/Annotations/"
output_root_dir = "data/pcb_dataset/yolo_labels/"

In [59]:
import os
import random
import shutil

# Define directories
image_dir = "data/pcb_dataset/resized_images/"
label_dir = "data/pcb_dataset/yolo_labels/"
train_image_dir = "data/pcb_dataset/train/images/"
val_image_dir = "data/pcb_dataset/val/images/"
train_label_dir = "data/pcb_dataset/train/labels/"
val_label_dir = "data/pcb_dataset/val/labels/"

# Create the necessary directories if not already created
os.makedirs(train_image_dir, exist_ok=True)
os.makedirs(val_image_dir, exist_ok=True)
os.makedirs(train_label_dir, exist_ok=True)
os.makedirs(val_label_dir, exist_ok=True)

# List all classes
classes = ['Missing_hole', 'Mouse_bite', 'Open_circuit', 'Short', 'Spur', 'Spurious_copper']

# Split the data into train and validation sets (80% train, 20% validation)
for class_name in classes:
    # Get the list of images and labels
    image_class_dir = os.path.join(image_dir, class_name)
    label_class_dir = os.path.join(label_dir, class_name)

    # List all images in the class directory
    images = [f for f in os.listdir(image_class_dir) if f.endswith('.jpg')]
    
    # Shuffle the list of images for random splitting
    random.shuffle(images)
    
    # Calculate the split index for 80% train, 20% val
    split_index = int(0.8 * len(images))
    
    # Split the images and move them to the respective directories
    for image_name in images[:split_index]:
        # Move image to train folder
        shutil.move(os.path.join(image_class_dir, image_name), os.path.join(train_image_dir, image_name))
        
        # Move corresponding label file to train folder
        label_name = image_name.replace('.jpg', '.txt')
        shutil.move(os.path.join(label_class_dir, label_name), os.path.join(train_label_dir, label_name))
    
    for image_name in images[split_index:]:
        # Move image to val folder
        shutil.move(os.path.join(image_class_dir, image_name), os.path.join(val_image_dir, image_name))
        
        # Move corresponding label file to val folder
        label_name = image_name.replace('.jpg', '.txt')
        shutil.move(os.path.join(label_class_dir, label_name), os.path.join(val_label_dir, label_name))

print("✅ Data split into train and validation sets successfully!")



✅ Data split into train and validation sets successfully!
