In [7]:
import os
import random
import json
import pandas as pd
import numpy as np
import cv2
from pycocotools.coco import COCO
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import matplotlib.pyplot as plt

# Paths
TRAIN_PATH = 'D:/Download/JDownloader/MSCOCO/images/train2017'
VAL_PATH = 'D:/Download/JDownloader/MSCOCO/images/val2017'
ANNOTATIONS_PATH = 'D:/Download/JDownloader/MSCOCO/annotations'
WORKING_DIR = 'D:/Projetos/Mestrado/2024_Topicos_Esp_Sist_Informacao/ARTIGO_FINAL/object_detection_model_compare/working'

os.makedirs(WORKING_DIR, exist_ok=True)

# Categories to filter
FILTERED_CATEGORIES = ['person', 'cat', 'dog']

# Load COCO annotations
annotations_file = os.path.join(ANNOTATIONS_PATH, 'instances_train2017.json')
coco = COCO(annotations_file)

# Get category IDs for the selected categories
category_ids = coco.getCatIds(catNms=FILTERED_CATEGORIES)

# Generate filtered dataset
filtered_data = []
for category_id in category_ids:
    ann_ids = coco.getAnnIds(catIds=[category_id])
    anns = coco.loadAnns(ann_ids)
    selected_anns = random.sample(anns, min(1000, len(anns)))
    for ann in selected_anns:
        image_info = coco.loadImgs(ann['image_id'])[0]
        filtered_data.append({
            "image_id": ann['image_id'],
            "image": image_info['file_name'],
            "category_id": ann['category_id'],
            "bbox": ann['bbox']
        })

# Save filtered data to CSV
filtered_csv_path = os.path.join(WORKING_DIR, 'filtered_coco.csv')
filtered_df = pd.DataFrame(filtered_data)
filtered_df.to_csv(filtered_csv_path, index=False)

print(f"Filtered dataset saved to {os.path.abspath(filtered_csv_path)}")

# Split the filtered dataset
data = pd.read_csv(filtered_csv_path)

# Display record count per category_id
category_counts = data['category_id'].value_counts()
print("Record count per category_id:")
print(category_counts)
print("")

# Split into training and testing datasets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Display record count per category_id for train and test
category_train_counts = train_data['category_id'].value_counts()
print("Train - Record count per category_id:")
print(category_train_counts)
print("")

category_test_counts = test_data['category_id'].value_counts()
print("Test - Record count per category_id:")
print(category_test_counts)
print("")

# Save the split datasets
train_csv_path = os.path.join(WORKING_DIR, 'train_data.csv')
test_csv_path = os.path.join(WORKING_DIR, 'test_data.csv')

train_data.to_csv(train_csv_path, index=False)
test_data.to_csv(test_csv_path, index=False)

print(f"Training dataset saved to {os.path.abspath(train_csv_path)}")
print(f"Testing dataset saved to {os.path.abspath(test_csv_path)}")

# Define custom Dataset class
class CocoDataset(Dataset):
    def __init__(self, dataframe, image_dir):
        self.dataframe = dataframe
        self.image_dir = image_dir

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_path = os.path.join(self.image_dir, row['image'])
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32) / 255.0

        target = {
            "boxes": torch.tensor([row['bbox']], dtype=torch.float32),
            "labels": torch.tensor([row['category_id']], dtype=torch.int64)
        }

        return image, target

train_dataset = CocoDataset(train_data, TRAIN_PATH)
test_dataset = CocoDataset(test_data, TRAIN_PATH)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Model Setup
def get_faster_rcnn_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

model = get_faster_rcnn_model(num_classes=len(FILTERED_CATEGORIES) + 1)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Optimizer and Training Loop
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
num_epochs = 10
train_losses = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for images, targets in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        loss_value = losses.item()

        train_loss += loss_value
        losses.backward()
        optimizer.step()

    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    print(f"Epoch {epoch+1} Train Loss: {train_loss:.4f}")

# Save the trained model
model_save_path = os.path.join(WORKING_DIR, 'faster_rcnn_coco.pth')
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

# Plot Loss
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.legend()
plt.savefig(os.path.join(WORKING_DIR, 'training_loss.png'))
plt.show()


loading annotations into memory...
Done (t=7.89s)
creating index...
index created!
Filtered dataset saved to D:\Projetos\Mestrado\2024_Topicos_Esp_Sist_Informacao\ARTIGO_FINAL\object_detection_model_compare\working\filtered_coco.csv
Record count per category_id:
category_id
1     1000
17    1000
18    1000
Name: count, dtype: int64

Train - Record count per category_id:
category_id
18    814
17    803
1     783
Name: count, dtype: int64

Test - Record count per category_id:
category_id
1     217
17    197
18    186
Name: count, dtype: int64

Training dataset saved to D:\Projetos\Mestrado\2024_Topicos_Esp_Sist_Informacao\ARTIGO_FINAL\object_detection_model_compare\working\train_data.csv
Testing dataset saved to D:\Projetos\Mestrado\2024_Topicos_Esp_Sist_Informacao\ARTIGO_FINAL\object_detection_model_compare\working\test_data.csv


Training Epoch 1/10:   0%|          | 0/300 [00:00<?, ?it/s]


ValueError: too many dimensions 'str'