In [3]:

import os
import json
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from pycocotools.coco import COCO as coco
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import roc_curve, auc
from collections import defaultdict

# Define paths
# TRAIN_PATH = '/kaggle/input/coco-2017-dataset/coco2017/train2017'
# VAL_PATH = '/kaggle/input/coco-2017-dataset/coco2017/val2017'
# ANNOTATIONS_PATH = '/kaggle/input/coco-2017-dataset/coco2017/annotations'
# WORKING_DIR = '/kaggle/working'

TRAIN_PATH = 'D:/Download/JDownloader/MSCOCO/images/train2017'
VAL_PATH = 'D:/Download/JDownloader/MSCOCO/images/val2017'
ANNOTATIONS_PATH = 'D:/Download/JDownloader/MSCOCO/annotations'
WORKING_DIR = 'D:/Projetos/Mestrado/2024_Topicos_Esp_Sist_Informacao/ARTIGO_FINAL/object_detection_model_compare/working'



# Define classes of interest
CLASSES = ['person', 'cat', 'dog']
NUM_CLASSES = len(CLASSES) + 1  # +1 for background

# Set device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Device: {device}")

def create_coco_dataset(image_dir, annotations_file, classes, max_images_per_class=1000):
    coco = COCO(annotations_file)
    images = []
    annotations = []
    img_id = 0
    for class_name in classes:
        cat_ids = coco.getCatIds(catNms=[class_name])
        img_ids = coco.getImgIds(catIds=cat_ids)
        img_data = coco.loadImgs(img_ids)
        count = 0
        for img in img_data:
            ann_ids = coco.getAnnIds(imgIds=img['id'], catIds=cat_ids, iscrowd=None)
            anns = coco.loadAnns(ann_ids)
            if anns:  # Only include images with annotations for the target classes
                images.append(os.path.join(image_dir, img['file_name']))
                annotations.append(anns)
                count +=1
            if count >= max_images_per_class:
                break
    return images, annotations

def collate_fn(batch):
    return tuple(zip(*batch))

def main():
    # Data preprocessing
    train_images, train_annotations = create_coco_dataset(TRAIN_PATH, os.path.join(ANNOTATIONS_PATH, 'instances_train2017.json'), CLASSES)
    val_images, val_annotations = create_coco_dataset(VAL_PATH, os.path.join(ANNOTATIONS_PATH, 'instances_val2017.json'), CLASSES)


    # Model setup
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)
    model.to(device)

    # Training
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    num_epochs = 10  # Reduced for demonstration
    losses = []

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for i in range(len(train_images)):
            img = torchvision.io.read_image(train_images[i]).to(device).float()/255.0
            target = {}
            boxes = []
            labels = []
            for ann in train_annotations[i]:
                boxes.append(ann['bbox'])
                labels.append(CLASSES.index(coco.loadCats(ann['category_id'])[0]['name'])+1) # COCO category IDs are not sequential
            target['boxes'] = torch.tensor(boxes).reshape(-1,4).to(device)
            target['labels'] = torch.tensor(labels, dtype=torch.int64).to(device)
            target["image_id"] = torch.tensor([i])
            target["area"] = torch.tensor([ann['area'] for ann in train_annotations[i]])
            target["iscrowd"] = torch.tensor([ann['iscrowd'] for ann in train_annotations[i]], dtype=torch.int64)
            loss_dict = model([img], [target])
            loss = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        losses.append(epoch_loss/len(train_images))
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_images)}")


    # Saving outputs
    torch.save(model.state_dict(), os.path.join(WORKING_DIR, 'faster_rcnn_model.pth'))

    # Visualizations (Simplified for demonstration. Proper evaluation requires more complex metrics)
    plt.plot(losses)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss')
    plt.savefig(os.path.join(WORKING_DIR, 'loss.png'))

if __name__ == '__main__':
    main()


Device: cuda
loading annotations into memory...
Done (t=7.73s)
creating index...
index created!
loading annotations into memory...
Done (t=0.33s)
creating index...
index created!




IndexError: list index out of range