## Dataset Configuration

This notebook was trained on the RDD2022 dataset using Kaggle.
Due to the large dataset size, training was performed in the Kaggle environment.

For local execution, update the `DATASET_ROOT` path accordingly.

In [1]:
import os
import numpy as np
import torch
import torch.utils.data
from torch.utils.data import DataLoader, Dataset
from PIL import Image

import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F


In [None]:
class RDD2022Dataset(Dataset):
    def __init__(self, root, split="train", transforms=None):
        self.root = root
        self.split = split
        self.transforms = transforms

        self.img_dir = os.path.join(root, split, "images")
        self.label_dir = os.path.join(root, split, "labels")

        self.imgs = sorted(os.listdir(self.img_dir))
        self.labels = sorted(os.listdir(self.label_dir))

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.imgs[idx])
        img = Image.open(img_path).convert("RGB")
        img_width, img_height = img.size
        label_path = os.path.join(self.label_dir, self.labels[idx])
        boxes = []
        labels = []

        if os.path.exists(label_path):
            with open(label_path) as f:
                for line in f.readlines():
                    cls, x_center, y_center, w, h = map(float, line.strip().split())
                    cls = int(cls) + 1  
                    xmin = (x_center - w / 2) * img_width
                    xmax = (x_center + w / 2) * img_width
                    ymin = (y_center - h / 2) * img_height
                    ymax = (y_center + h / 2) * img_height
                    if xmax > xmin and ymax > ymin:
                        boxes.append([xmin, ymin, xmax, ymax])
                        labels.append(cls)
        if len(boxes) > 0:
            boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
            labels = torch.as_tensor(labels, dtype=torch.int64)
        else:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        if self.transforms:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [3]:
class ComposeTransforms:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class ToTensor:
    def __call__(self, image, target):
        return F.to_tensor(image), target


class RandomHorizontalFlip:
    def __init__(self, p=0.5):
        self.p = p

    def __call__(self, image, target):
        if np.random.rand() < self.p:
            image = F.hflip(image)
            _, h, w = image.shape
            boxes = target["boxes"]
            boxes[:, [0, 2]] = w - boxes[:, [2, 0]]
            target["boxes"] = boxes
        return image, target


def get_transform(train):
    transforms = [ToTensor()]
    if train:
        transforms.append(RandomHorizontalFlip(0.5))
    return ComposeTransforms(transforms)

In [None]:
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [5]:
def collate_fn(batch):
    return tuple(zip(*batch))


def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50):
    model.train()
    for i, (images, targets) in enumerate(data_loader):
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if i % print_freq == 0:
            print(f"Epoch [{epoch}] Iter [{i}/{len(data_loader)}] Loss: {losses.item():.4f}")

In [None]:
root = "/kaggle/input/rdd-2022/RDD_SPLIT"  
num_classes = 6  
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

dataset = RDD2022Dataset(root, "train", transforms=get_transform(train=True))
dataset_val = RDD2022Dataset(root, "val", transforms=get_transform(train=False))

data_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
data_loader_val = DataLoader(dataset_val, batch_size=2, shuffle=False, collate_fn=collate_fn)

model = get_model(num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

num_epochs = 5
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
    lr_scheduler.step()

torch.save(model.state_dict(), "road_damage_fasterrcnn.pth")

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 210MB/s]


Epoch [0] Iter [0/6718] Loss: 4.5357
Epoch [0] Iter [100/6718] Loss: 0.1669
Epoch [0] Iter [200/6718] Loss: 0.1839
Epoch [0] Iter [300/6718] Loss: 0.3806
Epoch [0] Iter [400/6718] Loss: 0.5278
Epoch [0] Iter [500/6718] Loss: 0.1133
Epoch [0] Iter [600/6718] Loss: 0.1344
Epoch [0] Iter [700/6718] Loss: 0.4814
Epoch [0] Iter [800/6718] Loss: 0.1362
Epoch [0] Iter [900/6718] Loss: 0.2128
Epoch [0] Iter [1000/6718] Loss: 0.1936
Epoch [0] Iter [1100/6718] Loss: 0.5642
Epoch [0] Iter [1200/6718] Loss: 0.3331
Epoch [0] Iter [1300/6718] Loss: 0.8158
Epoch [0] Iter [1400/6718] Loss: 0.3687
Epoch [0] Iter [1500/6718] Loss: 0.3294
Epoch [0] Iter [1600/6718] Loss: 0.4338
Epoch [0] Iter [1700/6718] Loss: 0.3871
Epoch [0] Iter [1800/6718] Loss: 0.2480
Epoch [0] Iter [1900/6718] Loss: 0.1299
Epoch [0] Iter [2000/6718] Loss: 0.2740
Epoch [0] Iter [2100/6718] Loss: 0.1164
Epoch [0] Iter [2200/6718] Loss: 0.3820
Epoch [0] Iter [2300/6718] Loss: 0.7547
Epoch [0] Iter [2400/6718] Loss: 0.5167
Epoch [0] It