# PCB Defect Detection with YOLOv8 üîç

**Before running:** Go to `Runtime` ‚Üí `Change runtime type` ‚Üí Select `T4 GPU`


In [None]:
# Step 1: Check GPU
!nvidia-smi


In [None]:
# Step 2: Install dependencies
!pip install -q ultralytics kagglehub


In [None]:
# Step 3: Setup Kaggle credentials
from google.colab import files
import os

# Create .kaggle directory
os.makedirs('/root/.kaggle', exist_ok=True)

# Upload your kaggle.json file (download from https://www.kaggle.com/settings)
print("Upload your kaggle.json file:")
uploaded = files.upload()

# Move to correct location
!mv kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
print("‚úÖ Kaggle credentials configured!")


In [None]:
# Step 4: Download the PCB Defects dataset
import kagglehub

dataset_path = kagglehub.dataset_download("akhatova/pcb-defects")
print(f"Dataset downloaded to: {dataset_path}")


In [None]:
# Step 5: Convert VOC annotations to YOLO format
import xml.etree.ElementTree as ET
from pathlib import Path
import shutil
import random

# Paths
DATA_DIR = Path(dataset_path) / "PCB_DATASET"
OUTPUT_DIR = Path("/content/yolo_dataset")

# Create output directories
for split in ['train', 'val', 'test']:
    (OUTPUT_DIR / 'images' / split).mkdir(parents=True, exist_ok=True)
    (OUTPUT_DIR / 'labels' / split).mkdir(parents=True, exist_ok=True)

# Class mapping
CLASSES = ["missing_hole", "mouse_bite", "open_circuit", "short", "spur", "spurious_copper"]
CLASS_TO_IDX = {cls: idx for idx, cls in enumerate(CLASSES)}

def parse_voc_annotation(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    filename = root.find("filename").text
    size = root.find("size")
    width = int(size.find("width").text)
    height = int(size.find("height").text)
    objects = []
    for obj in root.findall("object"):
        name = obj.find("name").text.lower()
        bbox = obj.find("bndbox")
        objects.append({
            "name": name,
            "xmin": int(bbox.find("xmin").text),
            "ymin": int(bbox.find("ymin").text),
            "xmax": int(bbox.find("xmax").text),
            "ymax": int(bbox.find("ymax").text)
        })
    return filename, width, height, objects

def convert_to_yolo(obj, img_width, img_height):
    class_name = obj["name"].replace(" ", "_")
    class_idx = CLASS_TO_IDX.get(class_name, -1)
    if class_idx == -1:
        return None
    x_center = (obj["xmin"] + obj["xmax"]) / 2 / img_width
    y_center = (obj["ymin"] + obj["ymax"]) / 2 / img_height
    width = (obj["xmax"] - obj["xmin"]) / img_width
    height = (obj["ymax"] - obj["ymin"]) / img_height
    return f"{class_idx} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"

# Collect all samples
samples = []
for class_dir in (DATA_DIR / "Annotations").iterdir():
    if not class_dir.is_dir():
        continue
    for xml_file in class_dir.glob("*.xml"):
        img_path = DATA_DIR / "images" / class_dir.name / (xml_file.stem + ".jpg")
        if img_path.exists():
            samples.append((img_path, xml_file))

print(f"Found {len(samples)} samples")

# Shuffle and split
random.seed(42)
random.shuffle(samples)
n_train = int(len(samples) * 0.7)
n_val = int(len(samples) * 0.2)

splits = [
    ("train", samples[:n_train]),
    ("val", samples[n_train:n_train + n_val]),
    ("test", samples[n_train + n_val:])
]

for split_name, split_samples in splits:
    for img_path, xml_path in split_samples:
        filename, width, height, objects = parse_voc_annotation(xml_path)
        yolo_lines = [convert_to_yolo(obj, width, height) for obj in objects]
        yolo_lines = [l for l in yolo_lines if l]
        
        new_name = f"{xml_path.parent.name}_{xml_path.stem}"
        shutil.copy(img_path, OUTPUT_DIR / "images" / split_name / f"{new_name}.jpg")
        with open(OUTPUT_DIR / "labels" / split_name / f"{new_name}.txt", "w") as f:
            f.write("\n".join(yolo_lines))
    print(f"{split_name}: {len(split_samples)} images")

print("‚úÖ Dataset converted!")


In [None]:
# Step 6: Create YOLO config file
yaml_content = """# PCB Defects Dataset
path: /content/yolo_dataset
train: images/train
val: images/val
test: images/test

names:
  0: missing_hole
  1: mouse_bite
  2: open_circuit
  3: short
  4: spur
  5: spurious_copper

nc: 6
"""

with open("/content/pcb_defects.yaml", "w") as f:
    f.write(yaml_content)

print("‚úÖ Config file created!")


In [None]:
# Step 7: Train YOLOv8 üöÄ
from ultralytics import YOLO

# Load pretrained model
model = YOLO("yolov8s.pt")

# Train
results = model.train(
    data="/content/pcb_defects.yaml",
    epochs=50,
    imgsz=640,
    batch=16,  # Can use larger batch on T4
    patience=15,
    device=0,  # Use GPU
    project="/content/runs",
    name="pcb_yolov8s",
    exist_ok=True,
    plots=True,
    mosaic=1.0,
    mixup=0.1,
    degrees=10.0,
    scale=0.5,
    fliplr=0.5,
)


In [None]:
# Step 8: Evaluate on test set
model = YOLO("/content/runs/pcb_yolov8s/weights/best.pt")

metrics = model.val(
    data="/content/pcb_defects.yaml",
    split="test",
    plots=True
)

print(f"\n{'='*50}")
print("Test Results")
print(f"{'='*50}")
print(f"mAP50: {metrics.box.map50:.4f}")
print(f"mAP50-95: {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall: {metrics.box.mr:.4f}")


In [None]:
# Step 9: Download trained model
from google.colab import files

# Download best weights
files.download("/content/runs/pcb_yolov8s/weights/best.pt")
print("‚úÖ Model downloaded!")


In [None]:
# Optional: Test on a sample image
from pathlib import Path
import matplotlib.pyplot as plt

# Get a test image
test_images = list(Path("/content/yolo_dataset/images/test").glob("*.jpg"))
if test_images:
    results = model.predict(test_images[0], conf=0.25)
    
    # Display result
    plt.figure(figsize=(12, 8))
    plt.imshow(results[0].plot())
    plt.axis('off')
    plt.title('PCB Defect Detection')
    plt.show()
