In [99]:
import os
import cv2
import torch
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torchvision.models.detection import FasterRCNN
from torchvision.transforms import functional as F
from torchvision.models.detection.transform import GeneralizedRCNNTransform
from torchvision.models.detection.roi_heads import RoIHeads
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.image_list import ImageList
from torchvision.models.detection.transform import resize_boxes
from torchvision.models.detection.rpn import AnchorGenerator, RPNHead
from torchvision.ops import MultiScaleRoIAlign

In [100]:
# Set the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [101]:
class CustomObjectDetectionDataset(Dataset):
    def __init__(self, data_dir, max_labels=6):
        self.data_dir = data_dir
        self.image_paths = []
        self.annotations = []
        self.max_labels = max_labels

        for root, _, files in os.walk(os.path.join(data_dir, "images")):
            for file in files:
                if file.lower().endswith((".jpg", ".jpeg", ".png")):
                    image_path = os.path.join(root, file)
                    label_path = os.path.join(data_dir, "labels", file.replace(".jpg", ".txt"))
                    self.image_paths.append(image_path)
                    self.annotations.append(label_path)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        annotation_path = self.annotations[idx]

        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        with open(annotation_path, 'r') as label_file:
            lines = label_file.read().splitlines()
            labels = []
            for line in lines:
                parts = line.split()
                if len(parts) == 6:
                    label = [int(parts[0])] + [float(p) for p in parts[1:]]
                    labels.append(label)

            while len(labels) < self.max_labels:
                labels.append([0, 0.0, 0.0, 0.0, 0.0, 0.0])

            labels = labels[:self.max_labels]

        # Reformat labels to match the expected shape [N, 4]
        num_labels = len(labels)
        labels = torch.tensor(labels, dtype=torch.float32).view(num_labels, -1)

        return F.to_tensor(image), labels

In [102]:
def create_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    return model

In [103]:
# Define the data directories
train_data_dir = 'C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/train'
valid_data_dir = 'C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/valid'

# Define data loaders for training and validation
train_dataset = CustomObjectDetectionDataset(train_data_dir)
valid_dataset = CustomObjectDetectionDataset(valid_data_dir)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=2, shuffle=False)

# Set up the optimizer and loss function
model = create_model(num_classes=5)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [104]:
# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_idx, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()

        # Reformat labels to match the expected shape [N, 4]
        num_labels = labels.size(0)
        boxes = labels[:, 1:5]  # Extract the bounding box coordinates
        labels = labels[:, 0].long()  # Extract the class labels

        targets = [{'boxes': boxes, 'labels': labels}]

        outputs = model(images, targets)
        loss = outputs['loss_classifier']  # Choose the appropriate loss as per your use case.

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    print(f'Training Epoch [{epoch + 1}/{num_epochs}]: Loss: {average_loss:.4f}')

# Validation loop
model.eval()
total_loss = 0

with torch.no_grad():
    for images, labels in valid_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()

average_loss = total_loss / len(valid_loader)
print(f'Validation Epoch [{epoch + 1}/{num_epochs}]: Loss: {average_loss:.4f}')

AssertionError: Expected target boxes to be a tensor of shape [N, 4], got torch.Size([2, 4, 6]).