In [1]:
!pip install -q effdet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.5/112.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
!pip install -q torch torchvision albumentations pycocotools opencv-python pytorch-lightning timm

In [50]:
import os
import cv2
import xml.etree.ElementTree as ET
from pathlib import Path
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pytorch_lightning as pl
from effdet.anchors import Anchors, AnchorLabeler
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet
from effdet import create_model
import timm
import random
import torchmetrics
from torchmetrics.detection.mean_ap import MeanAveragePrecision

In [4]:
# Set device and random seed
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Define dataset paths
DATA_DIR = "/kaggle/input/ip102-dataset"
IMG_DIR = os.path.join(DATA_DIR, "JPEGImages")
ANN_DIR = os.path.join(DATA_DIR, "Annotations")
TRAIN_LIST = os.path.join("/kaggle/working", "train.txt")
VAL_LIST = os.path.join("/kaggle/working", "val.txt")
TEST_LIST = os.path.join(DATA_DIR, "test.txt")

In [5]:
# Split trainval into train and val (if not already done)
def split_trainval(trainval_path, train_out_path, val_out_path, train_ratio=0.9):
    if not os.path.exists(train_out_path) or not os.path.exists(val_out_path):
        with open(trainval_path, 'r') as f:
            img_ids = [line.strip() for line in f.readlines()]
        random.shuffle(img_ids)
        split_idx = int(len(img_ids) * train_ratio)
        train_ids = img_ids[:split_idx]
        val_ids = img_ids[split_idx:]
        with open(train_out_path, 'w') as f:
            f.write('\n'.join(train_ids))
        with open(val_out_path, 'w') as f:
            f.write('\n'.join(val_ids))
        print(f"Train set: {len(train_ids)} images, Val set: {len(val_ids)} images")

# Execute split if files don't exist
split_trainval(os.path.join(DATA_DIR, "trainval.txt"), TRAIN_LIST, VAL_LIST)

Train set: 13660 images, Val set: 1518 images


In [35]:
# Custom Dataset for IP102
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import ParseError
class IP102Dataset(Dataset):
    def __init__(self, img_dir, ann_dir, img_list, transform=None):
        self.img_dir = Path(img_dir)
        self.ann_dir = Path(ann_dir)
        with open(img_list, 'r') as f:
            self.img_ids = [line.strip() for line in f.readlines()]
        self.transform = transform
        self.class_map = {i: str(i) for i in range(102)}  # 102 classes

    def __len__(self):
        return len(self.img_ids)


    def __getitem__(self, idx):
        try:
            img_id = self.img_ids[idx]
            img_path = self.img_dir / f"{img_id}.jpg"
            ann_path = self.ann_dir / f"{img_id}.xml"
    
            # Load image
            img = cv2.imread(str(img_path))
            if img is None:
                raise FileNotFoundError(f"Could not read image: {img_path}")
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_size = img.shape[:2]  # (H, W)
    
            # Parse XML annotation
            tree = ET.parse(str(ann_path))
            root = tree.getroot()
            boxes = []
            labels = []
    
            for obj in root.findall('object'):
                label = int(obj.find('name').text)  # Class ID (0–101)
                bbox = obj.find('bndbox')
                xmin = float(bbox.find('xmin').text)
                ymin = float(bbox.find('ymin').text)
                xmax = float(bbox.find('xmax').text)
                ymax = float(bbox.find('ymax').text)
    
                # Filter invalid boxes
                if xmax > xmin and ymax > ymin:
                    boxes.append([xmin, ymin, xmax, ymax])
                    labels.append(label)
    
            boxes = np.array(boxes, dtype=np.float32)
            labels = np.array(labels, dtype=np.int64)
    
            # If no valid boxes, insert dummy box
            if len(boxes) == 0:
                boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
                labels = np.array([0], dtype=np.int64)
    
            # Apply Albumentations transform
            if self.transform:
                transformed = self.transform(image=img, bboxes=boxes, class_labels=labels)
                img = transformed["image"]
                boxes = np.array(transformed["bboxes"], dtype=np.float32)
                labels = np.array(transformed["class_labels"], dtype=np.int64)
    
                transformed_height, transformed_width = img.shape[1:3]
                scale = min(transformed_height / original_size[0], transformed_width / original_size[1])
            else:
                scale = 1.0
    
            # Fallback again if transform removed all boxes
            if len(boxes) == 0:
                boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
                labels = np.array([0], dtype=np.int64)
    
            target = {
                "bbox": torch.tensor(boxes, dtype=torch.float32),
                "cls": torch.tensor(labels, dtype=torch.int64),
                "image_id": torch.tensor([idx], dtype=torch.int64),
                "img_scale": torch.tensor([scale], dtype=torch.float32),
                "img_size": torch.tensor([original_size], dtype=torch.float32)
            }
    
            return img, target
    
        except (ParseError, FileNotFoundError, cv2.error) as e:
            print(f"Skipping sample at index {idx} due to error: {e}")
            return None


In [36]:
# Define Data Augmentation
transform = A.Compose([
    A.Resize(512, 512),  # Fixed size to ensure uniform dimensions
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

# Create Datasets and DataLoaders
train_dataset = IP102Dataset(IMG_DIR, ANN_DIR, TRAIN_LIST, transform=transform)
val_dataset = IP102Dataset(IMG_DIR, ANN_DIR, VAL_LIST, transform=transform)
test_dataset = IP102Dataset(IMG_DIR, ANN_DIR, TEST_LIST, transform=transform)

In [41]:
def collate_fn(batch):
    # Remove any failed samples
    batch = [item for item in batch if item is not None]
    
    if len(batch) == 0:
        return None  # let Dataloader retry

    images, targets = zip(*batch)
    images = torch.stack(images)

    max_boxes = max([len(t["bbox"]) for t in targets])
    
    padded_bboxes = torch.stack([
        torch.cat([t["bbox"], torch.zeros(max_boxes - t["bbox"].size(0), 4, dtype=torch.float32)], dim=0)
        for t in targets
    ])
    padded_labels = torch.stack([
        torch.cat([t["cls"], torch.full((max_boxes - t["cls"].size(0),), -1, dtype=torch.int64)], dim=0)
        for t in targets
    ])

    adjusted_targets = {
        "bbox": padded_bboxes,
        "cls": padded_labels,
        "image_id": torch.stack([t["image_id"] for t in targets]),
        "img_scale": torch.cat([t["img_scale"] for t in targets], dim=0),
        "img_size": torch.cat([t["img_size"] for t in targets], dim=0)
    }

    return images, adjusted_targets


In [42]:
# Create Datasets and DataLoaders
train_dataset = IP102Dataset(IMG_DIR, ANN_DIR, TRAIN_LIST, transform=transform)
val_dataset = IP102Dataset(IMG_DIR, ANN_DIR, VAL_LIST, transform=transform)
test_dataset = IP102Dataset(IMG_DIR, ANN_DIR, TEST_LIST, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn, num_workers=2)

In [53]:
import torch
import pytorch_lightning as pl
from torchmetrics.detection.mean_ap import MeanAveragePrecision

class EfficientDetLightning(pl.LightningModule):
    def __init__(self, num_classes=102):
        super().__init__()
        config = get_efficientdet_config('tf_efficientdet_d0')
        config.num_classes = num_classes
        config.image_size = (512, 512)

        # Build base model
        base_model = create_model(
            model_name='tf_efficientdet_d0',
            num_classes=num_classes,
            pretrained=False
        )
        base_model.class_net = HeadNet(config, num_outputs=num_classes)
        self.model = DetBenchTrain(base_model, config)

        # Metric tracker
        self.map_metric = MeanAveragePrecision(iou_type="bbox", class_metrics=True)
        self.save_hyperparameters()

    def forward(self, images, targets=None):
        return self.model(images, targets)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        images = images.to(self.device)
    
        # Move all fields in the batched target dict to device
        targets = {k: v.to(self.device) for k, v in targets.items()}
    
        # Pass directly to the model (DetBenchTrain supports this format)
        loss_dict = self.model(images, targets)
        loss = loss_dict['loss']
    
        # Log training loss
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss
    def validation_step(self, batch, batch_idx):
        images, targets = batch
        images = images.to(self.device)
        targets = {k: v.to(self.device) for k, v in targets.items()}
    
        # Compute and log loss
        loss_dict = self.model(images, targets)
        self.log("val_loss", loss_dict['loss'], on_epoch=True, prog_bar=True)
    
        self.model.eval()
        with torch.no_grad():
            detections = self.model.model(images)  # inference mode: returns list of dicts
    
        self.model.train()
    
        # Build a lookup from image_id -> {boxes, labels}
        image_id_to_target = {}
        for i in range(images.size(0)):
            valid_mask = targets["cls"][i] != -1
            if valid_mask.sum() == 0:
                continue  # no valid GT
            img_id = targets["image_id"][i].item()
            image_id_to_target[img_id] = {
                "boxes": targets["bbox"][i][valid_mask],
                "labels": targets["cls"][i][valid_mask]
            }
    
        # Build aligned target list for each prediction
        aligned_targets = []
        aligned_preds = []
    
        for pred in detections:
            img_id = pred["image_id"].item() if "image_id" in pred else None
            if img_id is not None and img_id in image_id_to_target:
                aligned_preds.append({
                    "boxes": pred["boxes"].to(self.device),
                    "scores": pred["scores"].to(self.device),
                    "labels": pred["labels"].to(self.device)
                })
                aligned_targets.append(image_id_to_target[img_id])
    
        # Final check before update
        if len(aligned_preds) != len(aligned_targets):
            print("Some predictions skipped due to unmatched image_id.")
            return  # skip this batch safely

        if aligned_preds and aligned_targets:
            print(f"\n[DEBUG] Sample Prediction (epoch {self.current_epoch}, batch {batch_idx}):")
            print(aligned_preds[0])
            print("[DEBUG] Sample Target:")
            print(aligned_targets[0])
        
        if len(aligned_preds) == len(aligned_targets) and len(aligned_preds) > 0:
            self.map_metric.update(aligned_preds, aligned_targets)
    
        #self.map_metric.update(aligned_preds, aligned_targets)



    def on_validation_epoch_end(self):
        results = self.map_metric.compute()
        map_50 = results["map_50"]
        map_95 = results["map"]
        self.log("mAP@50", map_50, prog_bar=True)
        self.log("mAP@95", map_95, prog_bar=True)
        print(f"\nEpoch {self.current_epoch+1} - mAP@50: {map_50:.4f}, mAP@95: {map_95:.4f}")
        self.map_metric.reset()

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=75)
        return [optimizer], [scheduler]


NameError: name 'detections' is not defined

In [47]:
from pytorch_lightning.callbacks import TQDMProgressBar

class PersistentProgressBar(TQDMProgressBar):
    def init_train_tqdm(self):
        bar = super().init_train_tqdm()
        bar.leave = True  # Do not erase bar after epoch ends
        return bar

    def init_validation_tqdm(self):
        bar = super().init_validation_tqdm()
        bar.leave = True
        return bar


In [54]:
model = EfficientDetLightning(num_classes=102)

trainer = pl.Trainer(
    max_epochs=20,
    accelerator="auto",
    devices="auto",
    precision="32-true",
    log_every_n_steps=100,
    val_check_interval=1.0,
    callbacks=[PersistentProgressBar(refresh_rate=20)]
)

trainer.fit(model, train_loader, val_loader)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]


Epoch 1 - mAP@50: -1.0000, mAP@95: -1.0000




Training: |          | 0/? [00:00<?, ?it/s]

Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 1 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 2 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 3 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 4 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 5 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 6 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 7 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 8 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 9 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 10 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 11 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 12 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 13 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 14 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 15 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 16 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 17 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 18 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 19 - mAP@50: -1.0000, mAP@95: -1.0000
Skipping sample at index 4615 due to error: junk after document element: line 27, column 0


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 20 - mAP@50: -1.0000, mAP@95: -1.0000


In [64]:
import torch
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import numpy as np
from effdet.bench import DetBenchPredict



def evaluate_model(model, test_loader, device):
    model.eval()
    test_losses = []
    map_metric = MeanAveragePrecision(iou_type="bbox")

    # Use EfficientDet backbone and wrap with DetBenchPredict for inference
    backbone_model = model.model.model  # raw EfficientDet
    inference_model = DetBenchPredict(backbone_model).to(device)
    inference_model.eval()

    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(test_loader):
            images = images.to(device)
            targets = {k: v.to(device) for k, v in targets.items()}

            # Compute loss using training model
            outputs = model(images, targets)
            loss = outputs['loss']
            test_losses.append(loss.item())

            # Get predictions using inference wrapper
            detections = inference_model(images)

            target_list = []
            pred_list = []

            for i in range(len(images)):
                # Filter out padding (-1)
                valid_mask = targets["cls"][i] != -1
                if valid_mask.sum() == 0:
                    continue

                target_list.append({
                    "boxes": targets["bbox"][i][valid_mask],
                    "labels": targets["cls"][i][valid_mask]
                })

                pred = detections[i]

                # CASE 1: EfficientDet returns dict (newer versions)
                if isinstance(pred, dict):
                    if all(k in pred for k in ["boxes", "scores", "labels"]) and pred["boxes"].numel() > 0:
                        pred_list.append({
                            "boxes": pred["boxes"],
                            "scores": pred["scores"],
                            "labels": pred["labels"]
                        })

                # CASE 2: EfficientDet returns Nx6 tensor [x1, y1, x2, y2, score, label]
                elif isinstance(pred, torch.Tensor) and pred.ndim == 2 and pred.shape[1] == 6:
                    boxes = pred[:, :4]
                    scores = pred[:, 4]
                    labels = pred[:, 5].long()
                    if boxes.numel() > 0:
                        pred_list.append({
                            "boxes": boxes,
                            "scores": scores,
                            "labels": labels
                        })

                else:
                    print(f"[WARN] Skipping unexpected prediction format for sample {i}: {type(pred)}")

            if len(pred_list) == len(target_list) and len(pred_list) > 0:
                map_metric.update(pred_list, target_list)

    results = map_metric.compute()
    avg_loss = np.mean(test_losses)

    print(f"\n📊 Evaluation Summary:")
    print(f"Average Test Loss: {avg_loss:.4f}")
    print(f"mAP@50: {results['map_50']:.4f}")
    print(f"mAP@95: {results['map']:.4f}")

    return avg_loss, results


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

avg_loss, test_map_results = evaluate_model(model, test_loader, device)



📊 Evaluation Summary:
Average Test Loss: nan
mAP@50: 0.0388
mAP@95: 0.0388


In [57]:
# Evaluate on Test Set
model.eval()
test_losses = []
with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        # Move all tensors in targets to device
        adjusted_targets = {k: v.to(device) for k, v in targets.items()}
        outputs = model(images, adjusted_targets)
        loss = outputs['loss']
        test_losses.append(loss.item())
print(f"Average Test Loss: {np.mean(test_losses):.4f}")

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [56]:
# Export Model (Optional)
torch.save(model.state_dict(), "efficientdet_ip102.pth")
print("Model weights saved to efficientdet_ip102.pth")

Model weights saved to efficientdet_ip102.pth
