In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os
import xml.etree.ElementTree as ET

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, image_dir, target_dir, transform=None):
        self.image_dir = image_dir
        self.target_dir = target_dir
        self.transform = transform
        self.image_names = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]

    def get_label_id(self, label):
        label_map = {
            'mokolwane': 0,
            'mopororo': 1,
            'motswere': 2,
        }
        return label_map.get(label, 0)  # 0 for unknown labels

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_names[idx])
        image = Image.open(img_name).convert("RGB")

        target_name = os.path.join(self.target_dir, self.image_names[idx].replace('.jpg', '.xml'))
        target = self.load_annotation(target_name)

        if self.transform:
            image = self.transform(image)

        return image, target

    def load_annotation(self, target_file):
        tree = ET.parse(target_file)
        root = tree.getroot()

        boxes = []
        labels = []

        for obj in root.findall('object'):
            bndbox = obj.find('bndbox')
            xmin = float(bndbox.find('xmin').text)
            ymin = float(bndbox.find('ymin').text)
            xmax = float(bndbox.find('xmax').text)
            ymax = float(bndbox.find('ymax').text)

            if xmax > xmin and ymax > ymin:
                boxes.append([xmin, ymin, xmax, ymax])
                label = obj.find('name').text
                labels.append(self.get_label_id(label))
            else:
                print(f"Invalid bounding box skipped: {[xmin, ymin, xmax, ymax]} in {target_file}")

        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4), dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,), dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([0]),
            'area': (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]),
            'iscrowd': torch.zeros(len(labels), dtype=torch.int64)
        }

        return target

# Image transforms
transform = transforms.Compose([
    transforms.ToTensor(),  # No resizing needed for Faster R-CNN
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet normalization
])

# Create datasets
train_dataset = CustomDataset(
    image_dir="/content/drive/MyDrive/all/train/images_transformed",
    target_dir="/content/drive/MyDrive/all/train/annotations_transformed",
    transform=transform
)

val_dataset = CustomDataset(
    image_dir="/content/drive/MyDrive/all/val/images_transformed",
    target_dir="/content/drive/MyDrive/all/val/annotations_transformed",
    transform=transform
)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os
import xml.etree.ElementTree as ET

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, image_dir, target_dir, transform=None):
        self.image_dir = image_dir
        self.target_dir = target_dir
        self.transform = transform
        self.image_names = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]

    def get_label_id(self, label):
        label_map = {
            'mokolwane': 0,
            'mopororo': 1,
            'motswere': 2,
        }
        return label_map.get(label, 0)  # 0 for unknown labels

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_names[idx])
        image = Image.open(img_name).convert("RGB")

        target_name = os.path.join(self.target_dir, self.image_names[idx].replace('.jpg', '.xml'))
        target = self.load_annotation(target_name)

        if self.transform:
            image = self.transform(image)

        return image, target

    def load_annotation(self, target_file):
        tree = ET.parse(target_file)
        root = tree.getroot()

        boxes = []
        labels = []

        for obj in root.findall('object'):
            bndbox = obj.find('bndbox')
            xmin = float(bndbox.find('xmin').text)
            ymin = float(bndbox.find('ymin').text)
            xmax = float(bndbox.find('xmax').text)
            ymax = float(bndbox.find('ymax').text)

            if xmax > xmin and ymax > ymin:
                boxes.append([xmin, ymin, xmax, ymax])
                label = obj.find('name').text
                labels.append(self.get_label_id(label))
            else:
                print(f"Invalid bounding box skipped: {[xmin, ymin, xmax, ymax]} in {target_file}")

        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4), dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,), dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([0]),
            'area': (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]),
            'iscrowd': torch.zeros(len(labels), dtype=torch.int64)
        }

        return target

# Image transforms
transform = transforms.Compose([
    transforms.ToTensor(),  # No resizing needed for Faster R-CNN
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet normalization
])

# Create datasets
train_dataset = CustomDataset(
    image_dir="/content/drive/MyDrive/all/train/images_transformed",
    target_dir="/content/drive/MyDrive/all/train/annotations_transformed",
    transform=transform
)

val_dataset = CustomDataset(
    image_dir="/content/drive/MyDrive/all/val/images_transformed",
    target_dir="/content/drive/MyDrive/all/val/annotations_transformed",
    transform=transform
)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
