In [None]:
!pip install -q transformers torch torchvision pycocotools xml.etree.ElementTree numpy scikit-learn matplotlib
import os
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image
import xml.etree.ElementTree as ET
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

[31mERROR: Could not find a version that satisfies the requirement xml.etree.ElementTree (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for xml.etree.ElementTree[0m[31m
[0m

In [None]:
from google.colab import drive
drive.mount('/content/drive')
data_paths = {
    'Missing_hole': '/content/drive/My Drive/PCB/PCB_DATASET/images/Missing_hole',
    'Mouse_bite': '/content/drive/My Drive/PCB/PCB_DATASET/images/Mouse_bite',
    'Open_circuit': '/content/drive/My Drive/PCB/PCB_DATASET/images/Open_circuit',
    'Short': '/content/drive/My Drive/PCB/PCB_DATASET/images/Short',
    'Spurious_copper': '/content/drive/My Drive/PCB/PCB_DATASET/images/Spurious_copper',
    'Spur': '/content/drive/My Drive/PCB/PCB_DATASET/images/Spur'
}

annotation_paths = {
    'Missing_hole': '/content/drive/My Drive/PCB/PCB_DATASET/Annotations/Missing_hole',
    'Mouse_bite': '/content/drive/My Drive/PCB/PCB_DATASET/Annotations/Mouse_bite',
    'Open_circuit': '/content/drive/My Drive/PCB/PCB_DATASET/Annotations/Open_circuit',
    'Short': '/content/drive/My Drive/PCB/PCB_DATASET/Annotations/Short',
    'Spurious_copper': '/content/drive/My Drive/PCB/PCB_DATASET/Annotations/Spurious_copper',
    'Spur': '/content/drive/My Drive/PCB/PCB_DATASET/Annotations/Spur'
}

# Define classes
CLASSES = ['Missing Hole', 'Open Circuit', 'Short Circuit', 'Spur', 'Spurious Copper', 'Mouse Bite']
NUM_CLASSES = len(CLASSES)
id2label = {i: label for i, label in enumerate(CLASSES)}
label2id = {label: i for i, label in enumerate(CLASSES)}

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Custom Dataset
class PCBDataset(Dataset):
    def __init__(self, data_paths, annotation_paths, processor):
        self.data_paths = data_paths
        self.annotation_paths = annotation_paths
        self.processor = processor
        self.images = []
        self.annotations = []

        for defect_type in data_paths.keys():
            img_path = data_paths[defect_type]
            ann_path = annotation_paths[defect_type]

            for img_file in os.listdir(img_path):
                if img_file.endswith(('.jpg', '.png')):
                    img_id = len(self.images)
                    xml_file = os.path.join(ann_path, img_file.replace('.jpg', '.xml').replace('.png', '.xml'))

                    if os.path.exists(xml_file):
                        self.images.append(os.path.join(img_path, img_file))
                        self.annotations.append(self.parse_voc_xml(xml_file, img_id))

    def parse_voc_xml(self, xml_file, img_id):
        tree = ET.parse(xml_file)
        root = tree.getroot()

        annotations = []
        for obj in root.findall('object'):
            name = obj.find('name').text
            if name in label2id:
                bbox = obj.find('bndbox')
                annotations.append({
                    'image_id': img_id,
                    'category_id': label2id[name],
                    'bbox': [
                        float(bbox.find('xmin').text),
                        float(bbox.find('ymin').text),
                        float(bbox.find('xmax').text) - float(bbox.find('xmin').text),
                        float(bbox.find('ymax').text) - float(bbox.find('ymin').text)
                    ],
                    'area': (float(bbox.find('xmax').text) - float(bbox.find('xmin').text)) *
                            (float(bbox.find('ymax').text) - float(bbox.find('ymin').text)),
                    'iscrowd': 0
                })
        return annotations

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        annotations = self.annotations[idx]  # List of annotation dicts from parse_voc_xml

        # Process image and annotations with DETR processor
        encoding = self.processor(
            images=image,
            annotations={'image_id': idx, 'annotations': annotations},
            return_tensors="pt"
        )

        # Ensure pixel_values and pixel_mask have correct shape (remove extra dim)
        pixel_values = encoding['pixel_values'].squeeze(0)  # Remove batch dim [1, 3, H, W] -> [3, H, W]
        pixel_mask = encoding['pixel_mask'].squeeze(0)      # Remove batch dim [1, H, W] -> [H, W]

        # Format labels as a single dict per image
        labels_dict = {
            'labels': encoding['labels'],  # Tensor of class IDs
            'boxes': encoding['boxes']     # Tensor of box coordinates
        }

        return {
            'pixel_values': pixel_values,
            'pixel_mask': pixel_mask,
            'labels': labels_dict
        }

# The processor and dataset remain the same
# Update processor initialization (fix max_size warning)
processor = DetrImageProcessor.from_pretrained(
    'facebook/detr-resnet-50',
    size={'shortest_edge': 800, 'longest_edge': 1333}
)

In [None]:
# Create dataset and dataloader
def custom_collate_fn(batch):
    pixel_values = [item['pixel_values'] for item in batch]
    pixel_mask = [item['pixel_mask'] for item in batch]
    labels = [item['labels'] for item in batch]

    # Pad images to the largest size in the batch
    max_height = max(pv.shape[-2] for pv in pixel_values)
    max_width = max(pv.shape[-1] for pv in pixel_values)

    pixel_values_padded = torch.stack([
        torch.nn.functional.pad(pv, (0, max_width - pv.shape[-1], 0, max_height - pv.shape[-2]))
        for pv in pixel_values
    ])

    pixel_mask_padded = torch.stack([
        torch.nn.functional.pad(pm, (0, max_width - pm.shape[-1], 0, max_height - pm.shape[-2]))
        for pm in pixel_mask
    ])

    return {
        'pixel_values': pixel_values_padded,
        'pixel_mask': pixel_mask_padded,
        'labels': labels
    }
dataset = PCBDataset(data_paths, annotation_paths, processor)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
train_dataloader = DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
    num_workers=2,
    collate_fn=custom_collate_fn
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=2,
    collate_fn=custom_collate_fn
)

In [None]:
model = DetrForObjectDetection.from_pretrained('facebook/detr-resnet-50', num_labels=NUM_CLASSES, ignore_mismatched_sizes=True)
model.to('cpu')

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DetrForObjectDetection were not initialized from the model checkpoin

DetrForObjectDetection(
  (model): DetrModel(
    (backbone): DetrConvModel(
      (conv_encoder): DetrConvEncoder(
        (model): FeatureListNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): DetrFrozenBatchNorm2d()
          (act1): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): DetrFrozenBatchNorm2d()
              (act1): ReLU(inplace=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2): DetrFrozenBatchNorm2d()
              (drop_block): Identity()
              (act2): ReLU(inplace=True)
              (aa): Identity()
              (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

In [None]:
# Modified training loop (adjusted for correct label access)
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        pixel_values = batch['pixel_values']  # Shape should be [batch_size, 3, H, W]
        pixel_mask = batch['pixel_mask']      # Shape should be [batch_size, H, W]
        labels = batch['labels']              # List of dicts

        # Debug prints
        print("Pixel values shape:", pixel_values.shape)
        print("Pixel mask shape:", pixel_mask.shape)
        print("Labels type:", type(labels))
        print("First label type:", type(labels[0]))
        print("First label keys:", labels[0].keys())

        # Ensure labels are in the correct format (list of dicts)
        formatted_labels = []
        for label in labels:
            if isinstance(label, dict) and 'labels' in label and 'boxes' in label:
                formatted_labels.append(label)
            else:
                raise ValueError(f"Unexpected label format in training: {type(label)}")

        labels = formatted_labels

        # Forward pass
        outputs = model(
            pixel_values=pixel_values,
            pixel_mask=pixel_mask,
            labels=labels
        )
        loss = outputs.loss
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_dataloader)}")

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.
The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 50, in fetch
    data = self.dataset.__getitems__(possibly_batched_index)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataset.py", line 420, in __getitems__
    return [self.dataset[self.indices[idx]] for idx in indices]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataset.py", line 420, in <listcomp>
    return [self.dataset[self.indices[idx]] for idx in indices]
            ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-33-9a88df9da7ea>", line 69, in __getitem__
    'boxes': encoding['boxes']     # Tensor of box coordinates
             ~~~~~~~~^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/feature_extraction_utils.py", line 87, in __getitem__
    return self.data[item]
           ~~~~~~~~~^^^^^^
KeyError: 'boxes'


In [None]:
# Evaluation
def evaluate(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in dataloader:
            pixel_values = batch['pixel_values'].to('cuda')
            outputs = model(pixel_values=pixel_values)

            pred_boxes = outputs.pred_boxes.cpu().numpy()
            pred_scores = outputs.logits.softmax(-1).cpu().numpy()
            true_labels = [label['labels'].cpu().numpy() for label in batch['labels']]

            for i in range(len(pred_boxes)):
                pred = pred_scores[i].argmax(-1)
                all_preds.extend(pred)
                all_labels.extend(true_labels[i])

    # Calculate metrics
    cm = confusion_matrix(all_labels, all_preds)
    precision = np.diag(cm) / np.sum(cm, axis=0)
    recall = np.diag(cm) / np.sum(cm, axis=1)
    f1 = 2 * (precision * recall) / (precision + recall)
    mAP = np.mean([p for p in precision if not np.isnan(p)])  # Simple mAP approximation

    print(f"mAP: {mAP:.4f}")
    print(f"Precision: {np.nanmean(precision):.4f}")
    print(f"Recall: {np.nanmean(recall):.4f}")
    print(f"F1 Score: {np.nanmean(f1):.4f}")
    print("\nConfusion Matrix:")
    print(cm)

    return mAP

# Evaluate the model
mAP = evaluate(model, val_dataloader)

In [None]:
torch.save(model.state_dict(), '/content/drive/My Drive/PCB/detr_pcb_model.pth')