# SSD300 Complete Standalone Pipeline for Kaggle

This notebook contains everything needed to train and evaluate SSD300 on Pascal VOC 2012 dataset.
It's designed to run standalone in Kaggle without external dependencies.

## Features:
- Complete VOC to COCO conversion
- SSD300 training with enhanced logging
- Comprehensive evaluation with COCO metrics
- Visual comparisons of ground truth vs predictions
- Model checkpointing and visualization
- Comparison images testing from comparison_images.json

## 1. Setup and Imports

In [None]:
import os
import sys
import json
import xml.etree.ElementTree as ET
import logging
import argparse
from datetime import datetime
from pathlib import Path
import time
import random
import shutil

# Data handling
import pandas as pd
import numpy as np
from PIL import Image, ImageDraw, ImageFont

# ML/DL libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
from torchvision.models.detection import ssd300_vgg16
from torchvision.models.detection.ssd import SSDClassificationHead

# Visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
from IPython.display import display, clear_output

# COCO evaluation
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# Configure matplotlib and seaborn
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

print("✅ All imports successful!")
print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Configuration and Global Variables

In [None]:
# ===== CONFIGURATION - MODIFY THESE PATHS FOR YOUR SETUP =====

# For Kaggle, update these paths to match your Kaggle setup
CONFIG = {
    # Dataset paths (modify for Kaggle)
    'voc_root': '/kaggle/input/voc2012/VOCdevkit/VOC2012',  # Kaggle input path
    'output_dir': '/kaggle/working/ssd300_outputs',  # Kaggle working directory
    
    # Training parameters
    'batch_size': 4,
    'num_epochs': 5,
    'learning_rate': 1e-4,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'num_workers': 2,
    
    # Evaluation parameters
    'conf_threshold': 0.3,
    'nms_threshold': 0.45,
    
    # Experiment settings
    'experiment_name': f'ssd300_voc_{datetime.now().strftime("%Y%m%d_%H%M%S")}',
    'save_checkpoints': True,
    'checkpoint_every': 1,  # Save every N epochs
    'save_visualizations': True,
    'run_comparison_images': True
}

# VOC Classes
VOC_CLASSES = [
    "aeroplane", "bicycle", "bird", "boat", "bottle",
    "bus", "car", "cat", "chair", "cow",
    "diningtable", "dog", "horse", "motorbike", "person",
    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]

CLASS_TO_IDX = {cls: idx for idx, cls in enumerate(VOC_CLASSES)}
IDX_TO_CLASS = {idx: cls for idx, cls in enumerate(VOC_CLASSES)}

# Create output directories
os.makedirs(CONFIG['output_dir'], exist_ok=True)
os.makedirs(f"{CONFIG['output_dir']}/models", exist_ok=True)
os.makedirs(f"{CONFIG['output_dir']}/logs", exist_ok=True)
os.makedirs(f"{CONFIG['output_dir']}/predictions", exist_ok=True)
os.makedirs(f"{CONFIG['output_dir']}/visualizations", exist_ok=True)
os.makedirs(f"{CONFIG['output_dir']}/data", exist_ok=True)

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")

print(f"\n📁 Output directory created: {CONFIG['output_dir']}")
print(f"🎯 Experiment name: {CONFIG['experiment_name']}")

## 3. Enhanced Logging Setup

In [None]:
# Setup comprehensive logging
log_file = f"{CONFIG['output_dir']}/logs/{CONFIG['experiment_name']}.log"

# Create custom logger
logger = logging.getLogger('SSD300_Pipeline')
logger.setLevel(logging.INFO)

# Clear existing handlers
for handler in logger.handlers[:]:
    logger.removeHandler(handler)

# File handler
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)

# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)

# Formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Add handlers
logger.addHandler(file_handler)
logger.addHandler(console_handler)

# Training metrics tracking
training_metrics = {
    'start_time': None,
    'end_time': None,
    'epoch_times': [],
    'train_losses': [],
    'learning_rates': [],
    'best_map': 0.0,
    'final_metrics': {},
    'total_parameters': 0,
    'model_size_mb': 0
}

logger.info(f"Starting SSD300 experiment: {CONFIG['experiment_name']}")
logger.info(f"Configuration: {CONFIG}")
logger.info(f"Log file: {log_file}")

print(f"✅ Enhanced logging setup complete")
print(f"📝 Log file: {log_file}")

## 4. VOC to COCO Conversion Functions

In [None]:
def convert_voc_to_coco(voc_root, output_file, image_set='trainval'):
    """
    Convert Pascal VOC dataset to COCO format
    """
    logger.info(f"Converting VOC to COCO format for {image_set} set...")
    
    voc_path = Path(voc_root)
    
    # Initialize COCO format structure
    coco_format = {
        "info": {
            "description": "Pascal VOC 2012 in COCO format",
            "version": "1.0",
            "year": 2012,
            "contributor": "SSD300 Pipeline",
            "date_created": datetime.now().isoformat()
        },
        "licenses": [{
            "id": 1,
            "name": "Pascal VOC License",
            "url": "http://host.robots.ox.ac.uk/pascal/VOC/"
        }],
        "categories": [],
        "images": [],
        "annotations": []
    }
    
    # Add categories
    for idx, class_name in enumerate(VOC_CLASSES):
        coco_format["categories"].append({
            "id": idx + 1,  # COCO categories start from 1
            "name": class_name,
            "supercategory": "object"
        })
    
    # Read image IDs
    image_set_file = voc_path / 'ImageSets' / 'Main' / f'{image_set}.txt'
    if not image_set_file.exists():
        logger.error(f"Image set file not found: {image_set_file}")
        return False
    
    with open(image_set_file, 'r') as f:
        image_ids = [line.strip() for line in f.readlines()]
    
    annotation_id = 1
    conversion_stats = {'total_images': 0, 'total_annotations': 0, 'skipped_images': 0}
    
    logger.info(f"Processing {len(image_ids)} images...")
    
    for idx, image_id in enumerate(image_ids):
        if idx % 500 == 0:
            logger.info(f"Processed {idx}/{len(image_ids)} images")
        
        # Image file
        img_file = voc_path / 'JPEGImages' / f'{image_id}.jpg'
        if not img_file.exists():
            logger.warning(f"Image file not found: {img_file}")
            conversion_stats['skipped_images'] += 1
            continue
        
        # Get image dimensions
        try:
            with Image.open(img_file) as img:
                width, height = img.size
        except Exception as e:
            logger.warning(f"Cannot read image {img_file}: {e}")
            conversion_stats['skipped_images'] += 1
            continue
        
        # Add image info
        image_info = {
            "id": int(image_id) if image_id.isdigit() else hash(image_id) % (10**8),
            "file_name": f"{image_id}.jpg",
            "width": width,
            "height": height,
            "license": 1
        }
        coco_format["images"].append(image_info)
        conversion_stats['total_images'] += 1
        
        # Process annotations
        xml_file = voc_path / 'Annotations' / f'{image_id}.xml'
        if not xml_file.exists():
            continue
        
        try:
            tree = ET.parse(xml_file)
            root = tree.getroot()
            
            for obj in root.findall('object'):
                class_name = obj.find('name').text
                if class_name not in CLASS_TO_IDX:
                    continue
                
                # Get bounding box
                bbox_elem = obj.find('bndbox')
                xmin = float(bbox_elem.find('xmin').text)
                ymin = float(bbox_elem.find('ymin').text)
                xmax = float(bbox_elem.find('xmax').text)
                ymax = float(bbox_elem.find('ymax').text)
                
                # Convert to COCO format (x, y, width, height)
                bbox_width = xmax - xmin
                bbox_height = ymax - ymin
                area = bbox_width * bbox_height
                
                # Add annotation
                annotation = {
                    "id": annotation_id,
                    "image_id": image_info["id"],
                    "category_id": CLASS_TO_IDX[class_name] + 1,  # COCO categories start from 1
                    "bbox": [xmin, ymin, bbox_width, bbox_height],
                    "area": area,
                    "iscrowd": 0
                }
                coco_format["annotations"].append(annotation)
                annotation_id += 1
                conversion_stats['total_annotations'] += 1
                
        except Exception as e:
            logger.warning(f"Error processing annotations for {image_id}: {e}")
    
    # Save COCO format JSON
    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=2)
    
    logger.info(f"VOC to COCO conversion completed!")
    logger.info(f"Statistics: {conversion_stats}")
    logger.info(f"COCO file saved to: {output_file}")
    
    return conversion_stats

print("✅ VOC to COCO conversion functions defined")

## 5. Dataset Class Definition

In [None]:
class COCODetectionDataset(Dataset):
    """
    COCO format dataset for object detection
    """
    
    def __init__(self, coco_json_path, image_dir, transforms=None):
        self.coco = COCO(coco_json_path)
        self.image_dir = Path(image_dir)
        self.transforms = transforms
        self.ids = list(self.coco.imgs.keys())
        
        logger.info(f"Dataset initialized with {len(self.ids)} images")
        logger.info(f"Image directory: {self.image_dir}")
        logger.info(f"COCO file: {coco_json_path}")
    
    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, idx):
        img_id = self.ids[idx]
        img_info = self.coco.imgs[img_id]
        
        # Load image
        img_path = self.image_dir / img_info['file_name']
        image = Image.open(img_path).convert('RGB')
        
        # Get annotations
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)
        
        boxes = []
        labels = []
        
        for ann in anns:
            x, y, w, h = ann['bbox']
            # Convert to (xmin, ymin, xmax, ymax)
            boxes.append([x, y, x + w, y + h])
            # Convert category_id back to 0-based indexing
            labels.append(ann['category_id'] - 1)
        
        # Convert to tensors
        if len(boxes) == 0:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
        else:
            boxes = torch.tensor(boxes, dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)
        
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([img_id], dtype=torch.int64)
        }
        
        if self.transforms:
            image = self.transforms(image)
        
        return image, target

# Custom collate function for object detection
def collate_fn(batch):
    """
    Custom collate function for object detection datasets
    """
    images, targets = zip(*batch)
    images = torch.stack(images, 0)
    return images, list(targets)

print("✅ Dataset classes defined")

## 6. Model Setup and Training Functions

In [None]:
def create_ssd300_model(num_classes=20, pretrained=True):
    """
    Create SSD300 model with VGG16 backbone
    """
    logger.info(f"Creating SSD300 model with {num_classes} classes (pretrained={pretrained})")
    
    # Load pretrained SSD300 model
    model = ssd300_vgg16(pretrained=pretrained, progress=True)
    
    # Replace the classification head for VOC classes
    in_channels = [512, 1024, 512, 256, 256, 256]
    num_anchors = [4, 6, 6, 6, 4, 4]
    
    model.head.classification_head = SSDClassificationHead(
        in_channels, num_anchors, num_classes + 1  # +1 for background
    )
    
    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    logger.info(f"Model created successfully")
    logger.info(f"Total parameters: {total_params:,}")
    logger.info(f"Trainable parameters: {trainable_params:,}")
    
    training_metrics['total_parameters'] = total_params
    
    return model

def save_checkpoint(model, optimizer, epoch, loss, filepath, is_best=False):
    """
    Save model checkpoint
    """
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
        'training_metrics': training_metrics,
        'config': CONFIG
    }
    
    torch.save(checkpoint, filepath)
    logger.info(f"Checkpoint saved to {filepath}")
    
    if is_best:
        best_path = filepath.replace('.pth', '_best.pth')
        torch.save(checkpoint, best_path)
        logger.info(f"Best model saved to {best_path}")

def train_one_epoch(model, dataloader, optimizer, device, epoch):
    """
    Train model for one epoch
    """
    model.train()
    total_loss = 0.0
    num_batches = len(dataloader)
    
    logger.info(f"Starting epoch {epoch + 1} training...")
    epoch_start_time = time.time()
    
    for batch_idx, (images, targets) in enumerate(dataloader):
        # Move data to device
        images = images.to(device)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v 
                   for k, v in target.items()} for target in targets]
        
        # Forward pass
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        
        # Calculate total loss
        losses = sum(loss for loss in loss_dict.values())
        
        # Backward pass
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
        
        # Log progress
        if batch_idx % 10 == 0 or batch_idx == num_batches - 1:
            avg_loss = total_loss / (batch_idx + 1)
            logger.info(f"Epoch {epoch + 1}/{CONFIG['num_epochs']}, "
                       f"Batch {batch_idx + 1}/{num_batches}, "
                       f"Loss: {losses.item():.4f}, "
                       f"Avg Loss: {avg_loss:.4f}")
    
    epoch_time = time.time() - epoch_start_time
    avg_epoch_loss = total_loss / num_batches
    
    logger.info(f"Epoch {epoch + 1} completed in {epoch_time:.1f}s, "
               f"Average Loss: {avg_epoch_loss:.4f}")
    
    training_metrics['epoch_times'].append(epoch_time)
    training_metrics['train_losses'].append(avg_epoch_loss)
    
    return avg_epoch_loss

print("✅ Model and training functions defined")

## 7. Evaluation Functions

In [None]:
def evaluate_model(model, dataloader, device, coco_gt):
    """
    Evaluate model using COCO metrics
    """
    logger.info("Starting model evaluation...")
    model.eval()
    
    all_predictions = []
    
    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(dataloader):
            if batch_idx % 20 == 0:
                logger.info(f"Evaluation batch {batch_idx + 1}/{len(dataloader)}")
            
            images = images.to(device)
            
            # Get predictions
            predictions = model(images)
            
            # Process each image in the batch
            for i, (pred, target) in enumerate(zip(predictions, targets)):
                img_id = target['image_id'].item()
                
                boxes = pred['boxes'].cpu().numpy()
                scores = pred['scores'].cpu().numpy()
                labels = pred['labels'].cpu().numpy()
                
                # Filter by confidence threshold
                keep = scores >= CONFIG['conf_threshold']
                boxes = boxes[keep]
                scores = scores[keep]
                labels = labels[keep]
                
                # Convert to COCO format
                for box, score, label in zip(boxes, scores, labels):
                    x1, y1, x2, y2 = box
                    all_predictions.append({
                        "image_id": img_id,
                        "category_id": int(label) + 1,  # Convert back to 1-based
                        "bbox": [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
                        "score": float(score)
                    })
    
    if not all_predictions:
        logger.warning("No predictions generated!")
        return {}
    
    # Save predictions
    pred_file = f"{CONFIG['output_dir']}/predictions/ssd300_predictions.json"
    with open(pred_file, 'w') as f:
        json.dump(all_predictions, f, indent=2)
    
    logger.info(f"Generated {len(all_predictions)} predictions")
    logger.info(f"Predictions saved to {pred_file}")
    
    # Evaluate with COCO metrics
    try:
        coco_pred = coco_gt.loadRes(pred_file)
        coco_eval = COCOeval(coco_gt, coco_pred, 'bbox')
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()
        
        # Extract metrics
        metrics = {
            'mAP_0.5_0.95': coco_eval.stats[0],
            'mAP_0.5': coco_eval.stats[1],
            'mAP_0.75': coco_eval.stats[2],
            'mAP_small': coco_eval.stats[3],
            'mAP_medium': coco_eval.stats[4],
            'mAP_large': coco_eval.stats[5],
            'AR_1': coco_eval.stats[6],
            'AR_10': coco_eval.stats[7],
            'AR_100': coco_eval.stats[8],
            'AR_small': coco_eval.stats[9],
            'AR_medium': coco_eval.stats[10],
            'AR_large': coco_eval.stats[11]
        }
        
        logger.info("COCO Evaluation Results:")
        for metric_name, value in metrics.items():
            logger.info(f"  {metric_name}: {value:.4f}")
        
        return metrics
        
    except Exception as e:
        logger.error(f"COCO evaluation failed: {e}")
        return {}

print("✅ Evaluation functions defined")

## 8. Visualization Functions

In [None]:
def visualize_training_progress(training_metrics, save_path=None):
    """
    Create comprehensive training progress visualization
    """
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle(f'SSD300 Training Progress - {CONFIG["experiment_name"]}', fontsize=16)
    
    # Training loss
    if training_metrics['train_losses']:
        axes[0, 0].plot(training_metrics['train_losses'], 'b-', linewidth=2)
        axes[0, 0].set_title('Training Loss')
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Loss')
        axes[0, 0].grid(True, alpha=0.3)
    
    # Epoch times
    if training_metrics['epoch_times']:
        axes[0, 1].plot(training_metrics['epoch_times'], 'g-', linewidth=2)
        axes[0, 1].set_title('Training Time per Epoch')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Time (seconds)')
        axes[0, 1].grid(True, alpha=0.3)
    
    # Learning rates
    if training_metrics['learning_rates']:
        axes[1, 0].plot(training_metrics['learning_rates'], 'r-', linewidth=2)
        axes[1, 0].set_title('Learning Rate Schedule')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Learning Rate')
        axes[1, 0].grid(True, alpha=0.3)
        axes[1, 0].set_yscale('log')
    
    # Summary statistics
    axes[1, 1].axis('off')
    summary_text = f"""
Training Summary:
• Total Parameters: {training_metrics['total_parameters']:,}
• Epochs Completed: {len(training_metrics['train_losses'])}
• Final Loss: {training_metrics['train_losses'][-1]:.4f if training_metrics['train_losses'] else 'N/A'}
• Best mAP@0.5: {training_metrics['best_map']:.4f}
• Avg Epoch Time: {np.mean(training_metrics['epoch_times']):.1f}s
• Total Training Time: {sum(training_metrics['epoch_times']):.1f}s
"""
    axes[1, 1].text(0.1, 0.5, summary_text, fontsize=12, verticalalignment='center',
                    bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        logger.info(f"Training progress plot saved to {save_path}")
    
    plt.show()

def visualize_predictions(model, dataset, device, num_images=5, save_path=None):
    """
    Visualize model predictions on sample images
    """
    model.eval()
    
    # Select random images
    indices = random.sample(range(len(dataset)), min(num_images, len(dataset)))
    
    fig, axes = plt.subplots(2, len(indices), figsize=(4 * len(indices), 8))
    if len(indices) == 1:
        axes = axes.reshape(-1, 1)
    
    with torch.no_grad():
        for i, idx in enumerate(indices):
            image, target = dataset[idx]
            
            # Get prediction
            model_input = image.unsqueeze(0).to(device)
            prediction = model(model_input)[0]
            
            # Convert image back to PIL for visualization
            if isinstance(image, torch.Tensor):
                # Denormalize if needed
                img_array = image.permute(1, 2, 0).numpy()
                if img_array.min() < 0:  # Normalized image
                    img_array = (img_array * 0.229) + 0.485  # Approximate denormalization
                img_array = np.clip(img_array, 0, 1)
            else:
                img_array = np.array(image) / 255.0
            
            # Ground truth visualization
            axes[0, i].imshow(img_array)
            axes[0, i].set_title(f'Ground Truth')
            axes[0, i].axis('off')
            
            # Draw ground truth boxes
            if len(target['boxes']) > 0:
                for box, label in zip(target['boxes'], target['labels']):
                    x1, y1, x2, y2 = box.numpy()
                    rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, 
                                           linewidth=2, edgecolor='green', facecolor='none')
                    axes[0, i].add_patch(rect)
                    axes[0, i].text(x1, y1-5, VOC_CLASSES[label.item()], 
                                   color='green', fontsize=8, weight='bold')
            
            # Prediction visualization
            axes[1, i].imshow(img_array)
            axes[1, i].set_title(f'Predictions')
            axes[1, i].axis('off')
            
            # Draw prediction boxes
            boxes = prediction['boxes'].cpu().numpy()
            scores = prediction['scores'].cpu().numpy()
            labels = prediction['labels'].cpu().numpy()
            
            # Filter by confidence
            keep = scores >= CONFIG['conf_threshold']
            boxes = boxes[keep]
            scores = scores[keep]
            labels = labels[keep]
            
            for box, score, label in zip(boxes, scores, labels):
                x1, y1, x2, y2 = box
                rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, 
                                       linewidth=2, edgecolor='red', facecolor='none')
                axes[1, i].add_patch(rect)
                axes[1, i].text(x1, y1-5, f'{VOC_CLASSES[label]} ({score:.2f})', 
                               color='red', fontsize=8, weight='bold')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        logger.info(f"Prediction visualization saved to {save_path}")
    
    plt.show()

print("✅ Visualization functions defined")

## 9. Comparison Images Testing Functions

In [None]:
def test_comparison_images(model, dataset, device, comparison_file_path=None):
    """
    Test model on comparison images and create side-by-side visualizations
    """
    # Define comparison images if file not provided
    if comparison_file_path and os.path.exists(comparison_file_path):
        with open(comparison_file_path, 'r') as f:
            comparison_data = json.load(f)
        comparison_images = comparison_data.get('images', [])
    else:
        # Define some default comparison images
        comparison_images = [
            {"image_id": "2008_000002", "filename": "2008_000002.jpg", 
             "description": "Person with horse", "expected_objects": ["person", "horse"], "difficulty": "medium"},
            {"image_id": "2008_000008", "filename": "2008_000008.jpg", 
             "description": "Multiple cars", "expected_objects": ["car"], "difficulty": "easy"},
            {"image_id": "2008_000015", "filename": "2008_000015.jpg", 
             "description": "Birds in scene", "expected_objects": ["bird"], "difficulty": "hard"},
            {"image_id": "2008_000019", "filename": "2008_000019.jpg", 
             "description": "Person scene", "expected_objects": ["person"], "difficulty": "medium"},
            {"image_id": "2008_000021", "filename": "2008_000021.jpg", 
             "description": "Multiple objects", "expected_objects": ["person", "bicycle"], "difficulty": "hard"}
        ]
    
    logger.info(f"Testing on {len(comparison_images)} comparison images...")
    
    model.eval()
    results = []
    
    # Find comparison images in dataset
    dataset_filenames = {}
    for idx in range(len(dataset)):
        _, target = dataset[idx]
        img_id = target['image_id'].item()
        img_info = dataset.coco.imgs[img_id]
        dataset_filenames[img_info['file_name']] = idx
    
    comparison_found = []
    for img_data in comparison_images:
        if img_data['filename'] in dataset_filenames:
            comparison_found.append((img_data, dataset_filenames[img_data['filename']]))
    
    if not comparison_found:
        logger.warning("No comparison images found in dataset!")
        return []
    
    logger.info(f"Found {len(comparison_found)} comparison images in dataset")
    
    # Create visualization
    fig, axes = plt.subplots(len(comparison_found), 2, figsize=(12, 4 * len(comparison_found)))
    if len(comparison_found) == 1:
        axes = axes.reshape(1, -1)
    
    with torch.no_grad():
        for row, (img_data, idx) in enumerate(comparison_found):
            image, target = dataset[idx]
            
            # Get prediction
            model_input = image.unsqueeze(0).to(device)
            prediction = model(model_input)[0]
            
            # Convert image for visualization
            if isinstance(image, torch.Tensor):
                img_array = image.permute(1, 2, 0).numpy()
                if img_array.min() < 0:  # Normalized
                    img_array = (img_array * 0.229) + 0.485
                img_array = np.clip(img_array, 0, 1)
            else:
                img_array = np.array(image) / 255.0
            
            # Ground truth (left side)
            axes[row, 0].imshow(img_array)
            axes[row, 0].set_title(f'Ground Truth - {img_data["description"]}\nExpected: {", ".join(img_data["expected_objects"])}')
            axes[row, 0].axis('off')
            
            # Draw ground truth boxes
            gt_objects = []
            if len(target['boxes']) > 0:
                for box, label in zip(target['boxes'], target['labels']):
                    x1, y1, x2, y2 = box.numpy()
                    class_name = VOC_CLASSES[label.item()]
                    gt_objects.append(class_name)
                    
                    rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, 
                                           linewidth=2, edgecolor='green', facecolor='none')
                    axes[row, 0].add_patch(rect)
                    axes[row, 0].text(x1, y1-5, class_name, 
                                     color='green', fontsize=10, weight='bold',
                                     bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8))
            
            # Predictions (right side)
            axes[row, 1].imshow(img_array)
            axes[row, 1].set_title(f'Predictions - Difficulty: {img_data["difficulty"]}')
            axes[row, 1].axis('off')
            
            # Draw prediction boxes
            boxes = prediction['boxes'].cpu().numpy()
            scores = prediction['scores'].cpu().numpy()
            labels = prediction['labels'].cpu().numpy()
            
            # Filter by confidence
            keep = scores >= CONFIG['conf_threshold']
            boxes = boxes[keep]
            scores = scores[keep]
            labels = labels[keep]
            
            pred_objects = []
            for box, score, label in zip(boxes, scores, labels):
                x1, y1, x2, y2 = box
                class_name = VOC_CLASSES[label]
                pred_objects.append(class_name)
                
                rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, 
                                       linewidth=2, edgecolor='red', facecolor='none')
                axes[row, 1].add_patch(rect)
                axes[row, 1].text(x1, y1-5, f'{class_name}\n{score:.2f}', 
                                 color='red', fontsize=10, weight='bold',
                                 bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8))
            
            # Store results
            result = {
                'image_id': img_data['image_id'],
                'filename': img_data['filename'],
                'description': img_data['description'],
                'difficulty': img_data['difficulty'],
                'expected_objects': img_data['expected_objects'],
                'ground_truth_objects': gt_objects,
                'predicted_objects': pred_objects,
                'num_predictions': len(pred_objects),
                'avg_confidence': float(np.mean(scores)) if len(scores) > 0 else 0.0
            }
            results.append(result)
    
    plt.tight_layout()
    
    # Save comparison visualization
    comparison_viz_path = f"{CONFIG['output_dir']}/visualizations/comparison_predictions.png"
    plt.savefig(comparison_viz_path, dpi=300, bbox_inches='tight')
    logger.info(f"Comparison visualization saved to {comparison_viz_path}")
    plt.show()
    
    # Save results
    results_path = f"{CONFIG['output_dir']}/predictions/comparison_results.json"
    with open(results_path, 'w') as f:
        json.dump(results, f, indent=2)
    
    logger.info(f"Comparison results saved to {results_path}")
    
    # Print summary
    print("\n🎯 Comparison Images Results Summary:")
    for result in results:
        expected = set(result['expected_objects'])
        predicted = set(result['predicted_objects'])
        overlap = expected.intersection(predicted)
        
        print(f"\n📷 {result['filename']} ({result['difficulty']})")
        print(f"   Expected: {', '.join(expected)}")
        print(f"   Predicted: {', '.join(predicted)}")
        print(f"   Overlap: {', '.join(overlap) if overlap else 'None'}")
        print(f"   Avg Confidence: {result['avg_confidence']:.3f}")
    
    return results

print("✅ Comparison testing functions defined")

## 10. Data Preparation - Convert VOC to COCO

In [None]:
# Convert VOC dataset to COCO format
logger.info("Starting data preparation...")

coco_file = f"{CONFIG['output_dir']}/data/voc2012_coco.json"

print(f"🔄 Converting VOC dataset to COCO format...")
print(f"   VOC root: {CONFIG['voc_root']}")
print(f"   Output file: {coco_file}")

# Check if VOC dataset exists
if not os.path.exists(CONFIG['voc_root']):
    logger.error(f"VOC dataset not found at {CONFIG['voc_root']}")
    print(f"❌ VOC dataset not found at {CONFIG['voc_root']}")
    print("Please update the 'voc_root' path in the configuration section.")
else:
    # Convert dataset
    conversion_stats = convert_voc_to_coco(CONFIG['voc_root'], coco_file, 'trainval')
    
    if conversion_stats:
        print(f"✅ Conversion completed successfully!")
        print(f"   Total images: {conversion_stats['total_images']}")
        print(f"   Total annotations: {conversion_stats['total_annotations']}")
        print(f"   Skipped images: {conversion_stats['skipped_images']}")
        
        CONFIG['coco_file'] = coco_file
        CONFIG['image_dir'] = f"{CONFIG['voc_root']}/JPEGImages"
    else:
        print(f"❌ Conversion failed!")
        logger.error("VOC to COCO conversion failed")

## 11. Dataset and DataLoader Setup

In [None]:
# Setup data transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset
logger.info("Creating dataset and dataloaders...")

try:
    dataset = COCODetectionDataset(
        coco_json_path=CONFIG['coco_file'],
        image_dir=CONFIG['image_dir'],
        transforms=transform
    )
    
    # Split dataset (80% train, 20% val)
    dataset_size = len(dataset)
    train_size = int(0.8 * dataset_size)
    val_size = dataset_size - train_size
    
    train_dataset, val_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size],
        generator=torch.Generator().manual_seed(42)
    )
    
    # Create dataloaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=CONFIG['batch_size'],
        shuffle=True,
        num_workers=CONFIG['num_workers'],
        collate_fn=collate_fn,
        pin_memory=True if CONFIG['device'] == 'cuda' else False
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=CONFIG['batch_size'],
        shuffle=False,
        num_workers=CONFIG['num_workers'],
        collate_fn=collate_fn,
        pin_memory=True if CONFIG['device'] == 'cuda' else False
    )
    
    print(f"✅ Dataset created successfully!")
    print(f"   Total samples: {dataset_size}")
    print(f"   Training samples: {train_size}")
    print(f"   Validation samples: {val_size}")
    print(f"   Batch size: {CONFIG['batch_size']}")
    print(f"   Training batches: {len(train_loader)}")
    print(f"   Validation batches: {len(val_loader)}")
    
    logger.info(f"Dataset split - Train: {train_size}, Val: {val_size}")
    logger.info(f"DataLoaders created - Train batches: {len(train_loader)}, Val batches: {len(val_loader)}")
    
except Exception as e:
    logger.error(f"Failed to create dataset: {e}")
    print(f"❌ Failed to create dataset: {e}")

## 12. Model Setup

In [None]:
# Create model
logger.info("Creating SSD300 model...")

try:
    model = create_ssd300_model(num_classes=len(VOC_CLASSES), pretrained=True)
    model = model.to(CONFIG['device'])
    
    # Setup optimizer
    optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])
    
    # Calculate model size
    model_size = sum(p.numel() * p.element_size() for p in model.parameters()) / (1024 * 1024)
    training_metrics['model_size_mb'] = model_size
    
    print(f"✅ Model created successfully!")
    print(f"   Model: SSD300 with VGG16 backbone")
    print(f"   Device: {CONFIG['device']}")
    print(f"   Parameters: {training_metrics['total_parameters']:,}")
    print(f"   Model size: {model_size:.1f} MB")
    print(f"   Optimizer: Adam (lr={CONFIG['learning_rate']})")
    
    logger.info(f"Model setup completed - Device: {CONFIG['device']}, Size: {model_size:.1f}MB")
    
except Exception as e:
    logger.error(f"Failed to create model: {e}")
    print(f"❌ Failed to create model: {e}")

## 13. Training Loop

In [None]:
# Training loop with enhanced monitoring
logger.info("Starting training...")
training_metrics['start_time'] = datetime.now()

print(f"🚀 Starting SSD300 training...")
print(f"   Epochs: {CONFIG['num_epochs']}")
print(f"   Batch size: {CONFIG['batch_size']}")
print(f"   Learning rate: {CONFIG['learning_rate']}")
print(f"   Device: {CONFIG['device']}")
print(f"   Estimated time: {CONFIG['num_epochs'] * len(train_loader) * CONFIG['batch_size'] / 100:.1f} - {CONFIG['num_epochs'] * len(train_loader) * CONFIG['batch_size'] / 50:.1f} minutes")

best_loss = float('inf')

try:
    for epoch in range(CONFIG['num_epochs']):
        logger.info(f"Starting epoch {epoch + 1}/{CONFIG['num_epochs']}")
        print(f"\n📊 Epoch {epoch + 1}/{CONFIG['num_epochs']}")
        
        # Train for one epoch
        epoch_loss = train_one_epoch(model, train_loader, optimizer, CONFIG['device'], epoch)
        
        # Update learning rate tracking
        current_lr = optimizer.param_groups[0]['lr']
        training_metrics['learning_rates'].append(current_lr)
        
        # Save checkpoint
        if CONFIG['save_checkpoints'] and (epoch + 1) % CONFIG['checkpoint_every'] == 0:
            checkpoint_path = f"{CONFIG['output_dir']}/models/ssd300_epoch_{epoch + 1}.pth"
            is_best = epoch_loss < best_loss
            if is_best:
                best_loss = epoch_loss
            
            save_checkpoint(model, optimizer, epoch + 1, epoch_loss, checkpoint_path, is_best)
        
        # Print epoch summary
        print(f"   ✅ Epoch {epoch + 1} completed")
        print(f"   📉 Loss: {epoch_loss:.4f}")
        print(f"   ⏱️  Time: {training_metrics['epoch_times'][-1]:.1f}s")
        print(f"   📚 Learning Rate: {current_lr:.6f}")
        
        # Early visualization of training progress
        if (epoch + 1) % 2 == 0 or epoch == CONFIG['num_epochs'] - 1:
            clear_output(wait=True)
            print(f"Training Progress Update - Epoch {epoch + 1}/{CONFIG['num_epochs']}")
            if len(training_metrics['train_losses']) > 1:
                plt.figure(figsize=(10, 4))
                plt.subplot(1, 2, 1)
                plt.plot(training_metrics['train_losses'], 'b-', linewidth=2)
                plt.title('Training Loss')
                plt.xlabel('Epoch')
                plt.ylabel('Loss')
                plt.grid(True, alpha=0.3)
                
                plt.subplot(1, 2, 2)
                plt.plot(training_metrics['epoch_times'], 'g-', linewidth=2)
                plt.title('Epoch Time')
                plt.xlabel('Epoch')
                plt.ylabel('Time (s)')
                plt.grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.show()
    
    training_metrics['end_time'] = datetime.now()
    total_training_time = training_metrics['end_time'] - training_metrics['start_time']
    
    print(f"\n🎉 Training completed successfully!")
    print(f"   Total time: {total_training_time}")
    print(f"   Final loss: {training_metrics['train_losses'][-1]:.4f}")
    print(f"   Best loss: {best_loss:.4f}")
    
    logger.info(f"Training completed successfully in {total_training_time}")
    logger.info(f"Final loss: {training_metrics['train_losses'][-1]:.4f}")
    
except Exception as e:
    training_metrics['end_time'] = datetime.now()
    logger.error(f"Training failed: {e}")
    print(f"❌ Training failed: {e}")
    raise e

## 14. Model Evaluation

In [None]:
# Evaluate the trained model
logger.info("Starting model evaluation...")

print(f"🔍 Evaluating trained model...")

try:
    # Load ground truth COCO for evaluation
    coco_gt = COCO(CONFIG['coco_file'])
    
    # Evaluate on validation set
    eval_metrics = evaluate_model(model, val_loader, CONFIG['device'], coco_gt)
    
    if eval_metrics:
        training_metrics['final_metrics'] = eval_metrics
        training_metrics['best_map'] = eval_metrics.get('mAP_0.5', 0.0)
        
        print(f"\n📊 Evaluation Results:")
        print(f"   mAP@0.5-0.95: {eval_metrics.get('mAP_0.5_0.95', 0):.4f}")
        print(f"   mAP@0.5: {eval_metrics.get('mAP_0.5', 0):.4f}")
        print(f"   mAP@0.75: {eval_metrics.get('mAP_0.75', 0):.4f}")
        print(f"   AR@100: {eval_metrics.get('AR_100', 0):.4f}")
        
        logger.info(f"Evaluation completed - mAP@0.5: {eval_metrics.get('mAP_0.5', 0):.4f}")
    else:
        print(f"⚠️  Evaluation completed but no metrics generated")
        logger.warning("Evaluation completed but no metrics generated")
        
except Exception as e:
    logger.error(f"Evaluation failed: {e}")
    print(f"❌ Evaluation failed: {e}")

## 15. Training Progress Visualization

In [None]:
# Create comprehensive training visualization
if CONFIG['save_visualizations']:
    logger.info("Creating training progress visualization...")
    
    viz_path = f"{CONFIG['output_dir']}/visualizations/training_progress.png"
    visualize_training_progress(training_metrics, viz_path)
    
    print(f"✅ Training visualization saved to {viz_path}")

## 16. Sample Predictions Visualization

In [None]:
# Visualize sample predictions
if CONFIG['save_visualizations']:
    logger.info("Creating sample predictions visualization...")
    
    pred_viz_path = f"{CONFIG['output_dir']}/visualizations/sample_predictions.png"
    visualize_predictions(model, dataset, CONFIG['device'], num_images=4, save_path=pred_viz_path)
    
    print(f"✅ Sample predictions visualization saved to {pred_viz_path}")

## 17. Comparison Images Testing

In [None]:
# Test on comparison images
if CONFIG['run_comparison_images']:
    logger.info("Testing on comparison images...")
    
    print(f"🖼️  Testing on comparison images...")
    
    # Try to find comparison_images.json in common locations
    comparison_file_paths = [
        '/kaggle/input/comparison-images/comparison_images.json',
        '/kaggle/working/comparison_images.json',
        './comparison_images.json',
        '../comparison_images.json'
    ]
    
    comparison_file = None
    for path in comparison_file_paths:
        if os.path.exists(path):
            comparison_file = path
            break
    
    if comparison_file:
        print(f"📁 Found comparison file: {comparison_file}")
    else:
        print(f"⚠️  No comparison_images.json found, using default images")
    
    try:
        comparison_results = test_comparison_images(model, dataset, CONFIG['device'], comparison_file)
        
        print(f"\n✅ Comparison testing completed!")
        print(f"   Tested images: {len(comparison_results)}")
        
        logger.info(f"Comparison testing completed on {len(comparison_results)} images")
        
    except Exception as e:
        logger.error(f"Comparison testing failed: {e}")
        print(f"❌ Comparison testing failed: {e}")
else:
    print(f"⏭️  Skipping comparison images testing (disabled in config)")

## 18. Final Report Generation

In [None]:
# Generate comprehensive final report
logger.info("Generating final report...")

final_report = {
    'experiment_info': {
        'name': CONFIG['experiment_name'],
        'timestamp': datetime.now().isoformat(),
        'model': 'SSD300',
        'backbone': 'VGG16',
        'dataset': 'Pascal VOC 2012',
        'device': CONFIG['device']
    },
    'configuration': CONFIG,
    'training_metrics': training_metrics,
    'dataset_info': {
        'total_samples': len(dataset) if 'dataset' in locals() else 0,
        'train_samples': len(train_dataset) if 'train_dataset' in locals() else 0,
        'val_samples': len(val_dataset) if 'val_dataset' in locals() else 0,
        'num_classes': len(VOC_CLASSES),
        'classes': VOC_CLASSES
    },
    'model_info': {
        'total_parameters': training_metrics['total_parameters'],
        'model_size_mb': training_metrics['model_size_mb'],
        'optimizer': 'Adam',
        'learning_rate': CONFIG['learning_rate']
    },
    'output_files': {
        'log_file': f"{CONFIG['output_dir']}/logs/{CONFIG['experiment_name']}.log",
        'final_model': f"{CONFIG['output_dir']}/models/ssd300_epoch_{CONFIG['num_epochs']}.pth",
        'best_model': f"{CONFIG['output_dir']}/models/ssd300_epoch_{CONFIG['num_epochs']}_best.pth",
        'predictions': f"{CONFIG['output_dir']}/predictions/ssd300_predictions.json",
        'comparison_results': f"{CONFIG['output_dir']}/predictions/comparison_results.json",
        'training_viz': f"{CONFIG['output_dir']}/visualizations/training_progress.png",
        'predictions_viz': f"{CONFIG['output_dir']}/visualizations/sample_predictions.png",
        'comparison_viz': f"{CONFIG['output_dir']}/visualizations/comparison_predictions.png"
    }
}

# Save final report
report_file = f"{CONFIG['output_dir']}/{CONFIG['experiment_name']}_final_report.json"
with open(report_file, 'w') as f:
    json.dump(final_report, f, indent=2, default=str)

# Save final model
final_model_path = f"{CONFIG['output_dir']}/models/ssd300_final.pth"
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'config': CONFIG,
    'training_metrics': training_metrics,
    'final_report': final_report
}, final_model_path)

print(f"\n🎉 SSD300 Pipeline Completed Successfully!")
print(f"\n📋 Final Summary:")
print(f"   Experiment: {CONFIG['experiment_name']}")
print(f"   Device: {CONFIG['device']}")
print(f"   Training time: {training_metrics['end_time'] - training_metrics['start_time']}")
print(f"   Epochs completed: {len(training_metrics['train_losses'])}/{CONFIG['num_epochs']}")
print(f"   Final loss: {training_metrics['train_losses'][-1]:.4f}")
if training_metrics['best_map'] > 0:
    print(f"   Best mAP@0.5: {training_metrics['best_map']:.4f}")
print(f"   Model size: {training_metrics['model_size_mb']:.1f} MB")
print(f"   Parameters: {training_metrics['total_parameters']:,}")

print(f"\n📁 Output Files:")
print(f"   📊 Final report: {report_file}")
print(f"   🤖 Final model: {final_model_path}")
print(f"   📝 Training log: {log_file}")
print(f"   📈 Visualizations: {CONFIG['output_dir']}/visualizations/")
print(f"   🎯 Predictions: {CONFIG['output_dir']}/predictions/")

logger.info(f"SSD300 pipeline completed successfully!")
logger.info(f"Final report saved to {report_file}")
logger.info(f"Final model saved to {final_model_path}")
logger.info("=== EXPERIMENT COMPLETED ===")

print(f"\n✨ All done! Check the output directory for all generated files.")