In [1]:
import os
import numpy as np
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import json
from tqdm import tqdm
from collections import defaultdict

# Paths
# TRAIN_PATH = '/kaggle/input/coco-2017-dataset/coco2017/train2017'
# VAL_PATH = '/kaggle/input/coco-2017-dataset/coco2017/val2017'
# ANNOTATIONS_PATH = '/kaggle/input/coco-2017-dataset/coco2017/annotations'
# WORKING_DIR = '/kaggle/working'

TRAIN_PATH = 'D:/Download/JDownloader/MSCOCO/images/train2017'
VAL_PATH = 'D:/Download/JDownloader/MSCOCO/images/val2017'
ANNOTATIONS_PATH = 'D:/Download/JDownloader/MSCOCO/annotations'
WORKING_DIR = 'D:/Projetos/Mestrado/2024_Topicos_Esp_Sist_Informacao/ARTIGO_FINAL/object_detection_model_compare/working'


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class COCODataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform=None, categories=['person', 'cat', 'dog'], samples_per_category=1000):
        self.root_dir = root_dir
        self.transform = transform
        self.coco = COCO(annotation_file)
        
        # Get category IDs
        cat_ids = self.coco.getCatIds(catNms=categories)
        self.category_mapping = {old_id: new_id + 1 for new_id, old_id in enumerate(cat_ids)}
        
        # Get exactly samples_per_category images per category
        self.image_ids = self._select_balanced_images(cat_ids, samples_per_category)
        print(f"Total selected images: {len(self.image_ids)}")
        
    def _select_balanced_images(self, cat_ids, samples_per_category):
        """Select exactly samples_per_category images for each category."""
        selected_images = []
        category_counts = defaultdict(int)
        
        # Get all images with their categories
        for cat_id in cat_ids:
            img_ids = self.coco.getImgIds(catIds=[cat_id])
            np.random.shuffle(img_ids)  # Randomize the order
            
            # Keep track of selected images for each category
            for img_id in img_ids:
                if category_counts[cat_id] < samples_per_category:
                    selected_images.append(img_id)
                    category_counts[cat_id] += 1
        
        # Print statistics
        for cat_id in cat_ids:
            cat_name = self.coco.loadCats([cat_id])[0]['name']
            print(f"Selected {category_counts[cat_id]} images for category '{cat_name}'")
        
        # Remove duplicates while preserving order
        return list(dict.fromkeys(selected_images))
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        
        # Load image
        image = Image.open(os.path.join(self.root_dir, img_info['file_name'])).convert('RGB')
        image = torchvision.transforms.ToTensor()(image)
        
        # Get annotations
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        annotations = self.coco.loadAnns(ann_ids)
        
        boxes = []
        labels = []
        
        for ann in annotations:
            if ann['category_id'] in self.category_mapping:
                x, y, w, h = ann['bbox']
                boxes.append([x, y, x + w, y + h])
                labels.append(self.category_mapping[ann['category_id']])
        
        # Convert to tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        
        return image, target

def get_model(num_classes):
    # Load pre-trained model
    weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
    model = fasterrcnn_resnet50_fpn_v2(weights=weights)
    
    # Replace the classifier with a new one for our number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

def train_model(model, train_loader, val_loader, num_epochs=10):
    model.to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        
        for images, targets in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            
            epoch_loss += losses.item()
        
        train_losses.append(epoch_loss / len(train_loader))
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, targets in val_loader:
                images = list(image.to(device) for image in images)
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()
        
        val_losses.append(val_loss / len(val_loader))
        print(f'Epoch {epoch+1} - Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}')
    
    return train_losses, val_losses

def calculate_map(model, data_loader):
    model.eval()
    all_predictions = []
    all_targets = []
    
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            predictions = model(images)
            
            for pred, target in zip(predictions, targets):
                all_predictions.append(pred)
                all_targets.append(target)
    
    # Calculate mAP
    aps = []
    for class_id in range(1, 4):  # 3 classes + background
        predictions_class = [pred['boxes'][pred['labels'] == class_id] for pred in all_predictions]
        targets_class = [target['boxes'][target['labels'] == class_id] for target in all_targets]
        
        ap = calculate_ap(predictions_class, targets_class)
        aps.append(ap)
    
    return np.mean(aps)

def calculate_ap(predictions, targets, iou_threshold=0.5):
    # Simplified AP calculation
    if len(predictions) == 0 or len(targets) == 0:
        return 0.0
    
    tp = 0
    fp = 0
    
    for pred_boxes, target_boxes in zip(predictions, targets):
        if len(pred_boxes) == 0 or len(target_boxes) == 0:
            continue
            
        ious = box_iou(pred_boxes, target_boxes)
        max_ious = torch.max(ious, dim=1)[0]
        
        tp += torch.sum(max_ious >= iou_threshold).item()
        fp += torch.sum(max_ious < iou_threshold).item()
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    return precision

def box_iou(boxes1, boxes2):
    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
    
    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])
    
    wh = (rb - lt).clamp(min=0)
    inter = wh[:, :, 0] * wh[:, :, 1]
    
    union = area1[:, None] + area2 - inter
    
    return inter / union

def plot_metrics(train_losses, val_losses, mAP, save_dir):
    # Plot losses
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Losses')
    plt.legend()
    plt.savefig(os.path.join(save_dir, 'losses.png'))
    plt.close()
    
    # Plot mAP
    plt.figure(figsize=(10, 5))
    plt.plot(mAP)
    plt.xlabel('Epoch')
    plt.ylabel('mAP')
    plt.title('Mean Average Precision')
    plt.savefig(os.path.join(save_dir, 'map.png'))
    plt.close()

def main():
    # Set random seed for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    
    # Create datasets with exactly 1000 images per category
    train_dataset = COCODataset(TRAIN_PATH, 
                               os.path.join(ANNOTATIONS_PATH, 'instances_train2017.json'),
                               samples_per_category=1000)
    val_dataset = COCODataset(VAL_PATH,
                             os.path.join(ANNOTATIONS_PATH, 'instances_val2017.json'),
                             samples_per_category=200)  # 20% of training size
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
    val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
    
    # Initialize model
    model = get_model(num_classes=4)  # 3 classes + background
    
    # Train model
    train_losses, val_losses = train_model(model, train_loader, val_loader)
    
    # Calculate mAP
    mAP = calculate_map(model, val_loader)
    
    # Save model
    torch.save(model.state_dict(), os.path.join(WORKING_DIR, 'faster_rcnn_model.pth'))
    
    # Plot and save metrics
    plot_metrics(train_losses, val_losses, [mAP], WORKING_DIR)
    
    print(f'Training completed. Final mAP: {mAP:.4f}')

if __name__ == "__main__":
    main()

loading annotations into memory...
Done (t=8.04s)
creating index...
index created!
Selected 1000 images for category 'person'
Selected 1000 images for category 'cat'
Selected 1000 images for category 'dog'
Total selected images: 2972
loading annotations into memory...
Done (t=0.36s)
creating index...
index created!
Selected 200 images for category 'person'
Selected 184 images for category 'cat'
Selected 177 images for category 'dog'
Total selected images: 544


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to C:\Users\lauro/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:25<00:00, 6.89MB/s] 
Epoch 1/10:   8%|▊         | 115/1486 [07:37<1:30:54,  3.98s/it]


KeyboardInterrupt: 