In [1]:
import os
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from torchvision.transforms import functional as F
from tqdm import tqdm

# Define paths
# TRAIN_PATH = '/kaggle/input/coco-2017-dataset/coco2017/train2017'
# VAL_PATH = '/kaggle/input/coco-2017-dataset/coco2017/val2017'
# ANNOTATIONS_PATH = '/kaggle/input/coco-2017-dataset/coco2017/annotations'
# WORKING_DIR = '/kaggle/working'
# WEIGHTS_PATH = '/kaggle/input/model-cache/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth'


TRAIN_PATH = 'D:/Download/JDownloader/MSCOCO/images/train2017'
VAL_PATH = 'D:/Download/JDownloader/MSCOCO/images/val2017'
ANNOTATIONS_PATH = 'D:/Download/JDownloader/MSCOCO/annotations'
WORKING_DIR = 'D:/Projetos/Mestrado/2024_Topicos_Esp_Sist_Informacao/ARTIGO_FINAL/object_detection_model_compare/working'
WEIGHTS_PATH = 'D:/Projetos/Mestrado/2024_Topicos_Esp_Sist_Informacao/ARTIGO_FINAL/object_detection_model_compare/weigths/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth'


class FilteredCocoDataset(Dataset):
    def __init__(self, root, annFile, transform=None, max_samples=1000):
        """
        Initialize the COCO dataset
        Args:
            root (str): Root directory where images are downloaded to
            annFile (str): Path to json annotation file
            transform (callable, optional): Optional transform to be applied on a sample
            max_samples (int): Maximum number of samples to use
        """
        self.root = root
        self.coco = COCO(annFile)
        self.transform = transform
        
        # Filter for person, cat, and dog categories
        cat_ids = self.coco.getCatIds(catNms=['person', 'cat', 'dog'])
        img_ids = []
        for cat_id in cat_ids:
            img_ids.extend(self.coco.getImgIds(catIds=[cat_id]))
        
        # Remove duplicates and limit to max_samples
        self.img_ids = list(set(img_ids))[:max_samples]
        
        # Create category mapping
        self.cat_mapping = {cat_id: idx + 1 for idx, cat_id in enumerate(cat_ids)}
        
        print(f"Found {len(self.img_ids)} images")
        print(f"Category mapping: {self.cat_mapping}")
    
    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        
        # Load image
        img = Image.open(os.path.join(self.root, img_info['file_name'])).convert('RGB')
        
        # Get annotations
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)
        
        boxes = []
        labels = []
        
        for ann in anns:
            if ann['category_id'] in self.cat_mapping:
                bbox = ann['bbox']
                boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
                labels.append(self.cat_mapping[ann['category_id']])
        
        # Convert to tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([img_id])
        }
        
        if self.transform is not None:
            img = self.transform(img)
        
        return img, target
    
    def __len__(self):
        return len(self.img_ids)

def get_model(num_classes, weights_path):
    # Initialize model without downloading weights
    model = fasterrcnn_resnet50_fpn(weights=None)
    
    # Load pretrained weights from local file
    if os.path.exists(weights_path):
        print(f"Loading pretrained weights from {weights_path}")
        state_dict = torch.load(weights_path)
        model.load_state_dict(state_dict)
    else:
        print(f"Warning: Weights file not found at {weights_path}. Starting with random weights.")
    
    # Modify the box predictor for our number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

def calculate_ap(predictions, targets):
    """
    Calculate Average Precision
    """
    # Simplified AP calculation
    if len(predictions) == 0 or len(targets) == 0:
        return 0.0
    
    correct = 0
    total = len(targets)
    
    for pred in predictions:
        for target in targets:
            if torch.all(torch.abs(pred - target) < 0.5):  # IoU threshold of 0.5
                correct += 1
                break
    
    return correct / total if total > 0 else 0.0

def train_model(model, train_loader, val_loader, device, num_epochs=10):
    model.to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        with tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}') as pbar:
            for images, targets in pbar:
                images = [image.to(device) for image in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
                
                optimizer.zero_grad()
                losses.backward()
                optimizer.step()
                
                total_loss += losses.item()
                pbar.set_postfix({'loss': losses.item()})
        
        avg_loss = total_loss / len(train_loader)
        train_losses.append(avg_loss)
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, targets in val_loader:
                images = [image.to(device) for image in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()
        
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        print(f'Epoch {epoch+1} - Training Loss: {avg_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')
    
    return train_losses, val_losses

def calculate_map(model, val_loader, device):
    model.eval()
    all_predictions = []
    all_targets = []
    
    with torch.no_grad():
        for images, targets in val_loader:
            images = [image.to(device) for image in images]
            predictions = model(images)
            
            all_predictions.extend(predictions)
            all_targets.extend(targets)
    
    # Calculate mAP (simplified version)
    map_score = 0
    num_classes = 4  # background + 3 classes
    
    for cls in range(1, num_classes):
        predictions = [pred['boxes'][pred['labels'] == cls] for pred in all_predictions]
        targets = [target['boxes'][target['labels'] == cls] for target in all_targets]
        
        # Calculate AP for each class
        ap = calculate_ap(predictions, targets)
        map_score += ap
    
    return map_score / (num_classes - 1)

def plot_metrics(train_losses, val_losses, map_score, save_dir):
    # Plot training and validation loss
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(os.path.join(save_dir, 'loss_plot.png'))
    plt.close()
    
    # Plot mAP
    plt.figure(figsize=(10, 5))
    plt.bar(['mAP'], [map_score])
    plt.title('Mean Average Precision')
    plt.ylabel('Score')
    plt.savefig(os.path.join(save_dir, 'map_plot.png'))
    plt.close()

def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Create datasets
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
    ])
    
    print("Creating datasets...")
    train_dataset = FilteredCocoDataset(
        TRAIN_PATH,
        os.path.join(ANNOTATIONS_PATH, 'instances_train2017.json'),
        transform=transform
    )
    
    val_dataset = FilteredCocoDataset(
        VAL_PATH,
        os.path.join(ANNOTATIONS_PATH, 'instances_val2017.json'),
        transform=transform
    )
    
    print(f"Training dataset size: {len(train_dataset)}")
    print(f"Validation dataset size: {len(val_dataset)}")
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=2,
        shuffle=True,
        collate_fn=lambda x: tuple(zip(*x))
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=2,
        shuffle=False,
        collate_fn=lambda x: tuple(zip(*x))
    )
    
    # Initialize model
    print("Initializing model...")
    model = get_model(num_classes=4, weights_path=WEIGHTS_PATH)  # background + 3 classes
    model = model.to(device)
    
    # Train model
    print("Starting training...")
    train_losses, val_losses = train_model(model, train_loader, val_loader, device)
    
    # Calculate mAP
    print("Calculating mAP...")
    map_score = calculate_map(model, val_loader, device)
    
    # Plot and save metrics
    print("Saving metrics and plots...")
    plot_metrics(train_losses, val_losses, map_score, WORKING_DIR)
    
    # Save model
    torch.save(model.state_dict(), os.path.join(WORKING_DIR, 'faster_rcnn_model.pth'))
    print("Training completed!")

if __name__ == '__main__':
    main()

Using device: cuda
Creating datasets...
loading annotations into memory...
Done (t=8.00s)
creating index...
index created!
Found 1000 images
Category mapping: {1: 1, 17: 2, 18: 3}
loading annotations into memory...
Done (t=0.35s)
creating index...
index created!
Found 1000 images
Category mapping: {1: 1, 17: 2, 18: 3}
Training dataset size: 1000
Validation dataset size: 1000
Initializing model...


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\lauro/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:18<00:00, 5.63MB/s]
  state_dict = torch.load(weights_path)


Loading pretrained weights from D:/Projetos/Mestrado/2024_Topicos_Esp_Sist_Informacao/ARTIGO_FINAL/object_detection_model_compare/weigths/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
Starting training...


Epoch 1/10:  63%|██████▎   | 317/500 [03:39<02:06,  1.44it/s, loss=0.401] 


KeyboardInterrupt: 