In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
EXTRACT_PATH = "/content/PCB_Dataset"

In [None]:
import os
DATASET_ROOT = "/content/drive/MyDrive/PCB_Dataset/PCB_DATASET"

ANNOTATION_PATH = os.path.join(DATASET_ROOT, "Annotations")
DEFECT_IMAGE_PATH = os.path.join(DATASET_ROOT, "images")
TEMPLATE_PATH = os.path.join(DATASET_ROOT, "PCB_USED")

print("Annotations:", ANNOTATION_PATH)
print("Defect Images:", DEFECT_IMAGE_PATH)
print("Template Images:", TEMPLATE_PATH)

Annotations: /content/drive/MyDrive/PCB_Dataset/PCB_DATASET/Annotations
Defect Images: /content/drive/MyDrive/PCB_Dataset/PCB_DATASET/images
Template Images: /content/drive/MyDrive/PCB_Dataset/PCB_DATASET/PCB_USED


In [None]:
import cv2
import os
import numpy as np
import glob                        # To list image and XML files
import cv2                         # OpenCV for image processing
import numpy as np                 # Numerical operations
import xml.etree.ElementTree as ET # To parse XML annotations
from google.colab.patches import cv2_imshow # Colab specific for displaying images

In [None]:
# Output folders
OUTPUT_MASK_PATH = "/content/drive/MyDrive/PCB_Dataset/output_defect_masks"
OUTPUT_ROI_PATH = "/content/drive/MyDrive/PCB_Dataset/output_defect_rois"
OUTPUT_VIS_PATH = "/content/drive/MyDrive/PCB_Dataset/output_visualization"

# Creating output directories
os.makedirs(OUTPUT_MASK_PATH, exist_ok=True)
os.makedirs(OUTPUT_ROI_PATH, exist_ok=True)
os.makedirs(OUTPUT_VIS_PATH, exist_ok=True)

In [None]:
print("DATASET ROOT:", DATASET_ROOT)
print("Annotations:", ANNOTATION_PATH)
print("Images:", DEFECT_IMAGE_PATH)
print("Templates:", TEMPLATE_PATH)

DATASET ROOT: /content/drive/MyDrive/PCB_Dataset/PCB_DATASET
Annotations: /content/drive/MyDrive/PCB_Dataset/PCB_DATASET/Annotations
Images: /content/drive/MyDrive/PCB_Dataset/PCB_DATASET/images
Templates: /content/drive/MyDrive/PCB_Dataset/PCB_DATASET/PCB_USED


In [None]:
!pip install matplotlib
import matplotlib.pyplot as plt



In [None]:
template_dict = {}

for temp_path in glob.glob(os.path.join(TEMPLATE_PATH, "*.*")):

    name = os.path.basename(temp_path)      # 01.JPG
    pcb_id = name.split(".")[0]             # 01

    img = cv2.imread(temp_path, cv2.IMREAD_GRAYSCALE)

    if img is None:
        print("Warning: Could not load:", temp_path)
        continue

    template_dict[pcb_id] = img


print("Loaded templates:", sorted(template_dict.keys()))


Loaded templates: ['01', '04', '05', '06', '07', '08', '09', '10', '11', '12']


In [None]:
def process_defective_image(test_img_path, template_img, defect_type, save_name):

    # 1. Load defective image
    test_img = cv2.imread(test_img_path, cv2.IMREAD_GRAYSCALE)

    if test_img is None:
        print("Could not load:", test_img_path)
        return

    # 2. Resize template
    template_resized = cv2.resize(
        template_img,
        (test_img.shape[1], test_img.shape[0])
    )

    # 3. Save input comparison
    combined = np.hstack([test_img, template_resized])

    cv2.imwrite(
        os.path.join(OUTPUT_VIS_PATH, f"{save_name}_input.jpg"),
        combined
    )

    # 4. Subtraction
    diff_img = cv2.absdiff(test_img, template_resized)

    # 5. OTSU Threshold
    _, defect_mask = cv2.threshold(
        diff_img,
        0, 255,
        cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )

    # Save mask
    mask_dir = os.path.join(OUTPUT_MASK_PATH, defect_type)
    os.makedirs(mask_dir, exist_ok=True)

    cv2.imwrite(
        os.path.join(mask_dir, f"{save_name}_mask.png"),
        defect_mask
    )

    # 6. Morphology
    kernel = np.ones((3,3), np.uint8)

    defect_mask = cv2.erode(defect_mask, kernel, 1)
    defect_mask = cv2.dilate(defect_mask, kernel, 2)

    # 7. Contours
    contours, _ = cv2.findContours(
        defect_mask,
        cv2.RETR_EXTERNAL,
        cv2.CHAIN_APPROX_SIMPLE
    )

    visual = cv2.cvtColor(test_img, cv2.COLOR_GRAY2BGR)
    roi_id = 0
    for cnt in contours:
        if cv2.contourArea(cnt) < 100:
            continue
        x,y,w,h = cv2.boundingRect(cnt)
        roi = test_img[y:y+h, x:x+w]
        roi_id += 1

        # Save ROI
        class_dir = os.path.join(OUTPUT_ROI_PATH, defect_type)
        os.makedirs(class_dir, exist_ok=True)
        roi_name = f"{save_name}_roi_{roi_id}.jpg"

        cv2.imwrite(
            os.path.join(class_dir, roi_name), roi
        )
        # Draw box
        cv2.rectangle(
            visual,
            (x,y),(x+w,y+h),
            (0,255,0),
            2
        )


        # Label
        cv2.putText(
            visual, defect_type, (x, y-5),
            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 2
        )
    # Save visualization
    vis_dir = os.path.join(OUTPUT_VIS_PATH, defect_type)
    os.makedirs(vis_dir, exist_ok=True)

    cv2.imwrite(
    os.path.join(vis_dir, f"{save_name}_defect.jpg"),
    visual
    )

In [None]:
for defect_type in os.listdir(ANNOTATION_PATH):

    ann_dir = os.path.join(ANNOTATION_PATH, defect_type)
    img_dir = os.path.join(DEFECT_IMAGE_PATH, defect_type)


    if not os.path.isdir(ann_dir):
        continue


    xml_files = glob.glob(os.path.join(ann_dir, "*.xml"))


    print(f"{defect_type}: {len(xml_files)} files")


    for xml_file in xml_files:

        tree = ET.parse(xml_file)
        root = tree.getroot()


        # Defective image name
        filename = root.find("filename").text


        defect_img_path = os.path.join(img_dir, filename)


        if not os.path.exists(defect_img_path):
            print("Missing image:", defect_img_path)
            continue


        # ============================
        # GET PCB ID FROM NAME
        # ============================

        pcb_id = filename.split("_")[0]   # "01"


        if pcb_id not in template_dict:
            print("Template not found for:", pcb_id)
            continue


        template_img = template_dict[pcb_id]


        save_name = f"{defect_type}_{filename.split('.')[0]}"


        # Debug (optional)
        # print("Mapping:", filename, "->", pcb_id)


        process_defective_image(
            defect_img_path,
            template_img,
            defect_type,
            save_name
        )


print("\nProcessing complete.")

Spurious_copper: 116 files
Spur: 115 files
Short: 116 files
Open_circuit: 116 files
Missing_hole: 115 files
Mouse_bite: 115 files

Processing complete.
