# Detection Model (A) - RainyNuScenes

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
import copy
import csv

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define U-Net Model with Pretrained ResNet Encoder
class UNetWithResNetEncoder(nn.Module):
    def __init__(self, out_channels=1):
        super(UNetWithResNetEncoder, self).__init__()

        # Pretrained ResNet backbone
        resnet = models.resnet34(pretrained=True)

        # Encoder layers from ResNet
        self.enc1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu)  # First conv block
        self.enc2 = nn.Sequential(resnet.layer1)  # ResNet layer 1
        self.enc3 = nn.Sequential(resnet.layer2)  # ResNet layer 2
        self.enc4 = nn.Sequential(resnet.layer3)  # ResNet layer 3
        self.enc5 = nn.Sequential(resnet.layer4)  # ResNet layer 4

        # Decoder layers
        def up_conv(in_channels, out_channels):
            return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)

        def conv_block(in_channels, out_channels):
            block = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True)
            )
            return block

        self.upconv4 = up_conv(512, 256)
        self.dec4 = conv_block(512, 256)

        self.upconv3 = up_conv(256, 128)
        self.dec3 = conv_block(256, 128)

        self.upconv2 = up_conv(128, 64)
        self.dec2 = conv_block(128, 64)

        self.upconv1 = up_conv(64, 64)
        self.dec1 = conv_block(64 + 64, 64)

        # Output layer
        self.conv_last = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        # Encoder
        enc1 = self.enc1(x)
        enc2 = self.enc2(enc1)
        enc3 = self.enc3(enc2)
        enc4 = self.enc4(enc3)
        enc5 = self.enc5(enc4)

        # Decoder
        dec4 = self.upconv4(enc5)
        dec4 = torch.cat((dec4, self._align_tensor(enc4, dec4)), dim=1)
        dec4 = self.dec4(dec4)

        dec3 = self.upconv3(dec4)
        dec3 = torch.cat((dec3, self._align_tensor(enc3, dec3)), dim=1)
        dec3 = self.dec3(dec3)

        dec2 = self.upconv2(dec3)
        dec2 = torch.cat((dec2, self._align_tensor(enc2, dec2)), dim=1)
        dec2 = self.dec2(dec2)

        dec1 = self.upconv1(dec2)
        dec1 = torch.cat((dec1, self._align_tensor(enc1, dec1)), dim=1)
        dec1 = self.dec1(dec1)

        return torch.sigmoid(self.conv_last(dec1))

    def _align_tensor(self, enc, dec):
        """Align encoder tensor to match the size of the decoder tensor."""
        enc_h, enc_w = enc.size(2), enc.size(3)
        dec_h, dec_w = dec.size(2), dec.size(3)

        if enc_h != dec_h or enc_w != dec_w:
            enc = nn.functional.interpolate(enc, size=(dec_h, dec_w), mode='bilinear', align_corners=False)

        return enc

# Define the custom Dataset class
class CombinedSegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.image_names = os.listdir(image_dir)
        self.mask_names = os.listdir(mask_dir)
        self.transform = transform

        # Ensure images and masks are aligned by name
        self.image_names.sort()
        self.mask_names.sort()

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_names[idx])
        mask_path = os.path.join(self.mask_dir, self.mask_names[idx])

        # Load the image and mask
        image = Image.open(image_path).convert('RGB')  # Convert to RGB
        mask = Image.open(mask_path).convert('L')  # Binary mask in grayscale

        if self.transform is not None:
            # Apply transformations to the image only
            image = self.transform(image)

        # Convert mask to tensor and normalize to 0 and 1
        mask = transforms.ToTensor()(mask)
        mask = torch.where(mask > 0, torch.tensor(1.0), torch.tensor(0.0))

        return image, mask

# IoU and Dice coefficient functions
def calculate_iou(pred, target):
    intersection = torch.sum(pred * target)
    union = torch.sum(pred) + torch.sum(target) - intersection
    return (intersection + 1e-6) / (union + 1e-6)

def calculate_dice(pred, target):
    intersection = torch.sum(pred * target)
    return (2 * intersection + 1e-6) / (torch.sum(pred) + torch.sum(target) + 1e-6)

# Validation loop
def validate_unet(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    iou_total = 0.0
    dice_total = 0.0

    with torch.no_grad():
        for images, masks in dataloader:
            images = images.to(device)
            masks = masks.to(device)

            outputs = model(images)
            loss = criterion(outputs, masks)

            running_loss += loss.item() * images.size(0)

            outputs = torch.where(outputs > 0.5, torch.tensor(1.0).to(device), torch.tensor(0.0).to(device))

            iou_total += calculate_iou(outputs, masks).item()
            dice_total += calculate_dice(outputs, masks).item()

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_iou = iou_total / len(dataloader)
    epoch_dice = dice_total / len(dataloader)

    return epoch_loss, epoch_iou, epoch_dice

# Training loop with CSV logging
def train_unet(model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, save_path, csv_path):
    best_loss = float('inf')
    epochs_no_improve = 0
    best_model_wts = copy.deepcopy(model.state_dict())

    # Prepare CSV file
    with open(csv_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Train_Loss', 'Train_IoU', 'Train_Dice', 'Val_Loss', 'Val_IoU', 'Val_Dice'])

    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        running_loss = 0.0
        iou_total = 0.0
        dice_total = 0.0
        model.train()

        for images, masks in train_loader:
            images = images.to(device)
            masks = masks.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, masks)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

            outputs = torch.where(outputs > 0.5, torch.tensor(1.0).to(device), torch.tensor(0.0).to(device))

            iou_total += calculate_iou(outputs, masks).item()
            dice_total += calculate_dice(outputs, masks).item()

        train_loss = running_loss / len(train_loader.dataset)
        train_iou = iou_total / len(train_loader)
        train_dice = dice_total / len(train_loader)

        val_loss, val_iou, val_dice = validate_unet(model, val_loader, criterion)

        print(f'Train Loss: {train_loss:.4f}, IoU: {train_iou:.4f}, Dice: {train_dice:.4f}')
        print(f'Val Loss: {val_loss:.4f}, IoU: {val_iou:.4f}, Dice: {val_dice:.4f}')

        # Save metrics to CSV
        with open(csv_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([epoch + 1, train_loss, train_iou, train_dice, val_loss, val_iou, val_dice])

        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), save_path)
            print(f"Model improved and saved to {save_path}")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            print(f"No improvement in loss for {epochs_no_improve} epoch(s).")

        if epochs_no_improve == patience:
            print("Early stopping!")
            break

    model.load_state_dict(best_model_wts)

# Hyperparameters
batch_size = 4
num_epochs = 50
learning_rate = 1e-3
patience = 3
save_path = '/home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/detection_application/results/woodscape_model.pth'
csv_path = '/home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/detection_application/results/woodscape_metrics.csv'

# Transformations
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),  # Scales image to [0, 1]
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize for RGB
])

# Dataset and Dataloaders
img_train_dir = '/home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/data/soiling_woodscape_data/train/droplet_rgb'
mask_train_dir = '/home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/data/soiling_woodscape_data/train/droplet_masks'
img_val_dir = '/home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/data/soiling_woodscape_data/val/droplet_rgb'
mask_val_dir = '/home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/data/soiling_woodscape_data/val/droplet_masks'

dataset_train = CombinedSegmentationDataset(img_train_dir, mask_train_dir, transform=transform)
dataset_val = CombinedSegmentationDataset(img_val_dir, mask_val_dir, transform=transform)

train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=False)

# Initialize model, loss, and optimizer
model = UNetWithResNetEncoder(out_channels=1).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
train_unet(model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, save_path, csv_path)




Epoch 1/50
Train Loss: 0.3916, IoU: 0.0023, Dice: 0.0035
Val Loss: 0.3743, IoU: 0.0001, Dice: 0.0001
Model improved and saved to /home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/detection_application/results/woodscape_model.pth
Epoch 2/50
Train Loss: 0.4006, IoU: 0.0000, Dice: 0.0000
Val Loss: 0.4705, IoU: 0.0000, Dice: 0.0000
No improvement in loss for 1 epoch(s).
Epoch 3/50
Train Loss: 0.3527, IoU: 0.1161, Dice: 0.1678
Val Loss: 0.4307, IoU: 0.3812, Dice: 0.5435
No improvement in loss for 2 epoch(s).
Epoch 4/50
Train Loss: 0.3151, IoU: 0.3710, Dice: 0.5163
Val Loss: 0.3023, IoU: 0.4936, Dice: 0.6435
Model improved and saved to /home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/detection_application/results/woodscape_model.pth
Epoch 5/50
Train Loss: 0.2774, IoU: 0.4886, Dice: 0.6380
Val Loss: 0.2767, IoU: 0.5927, Dice: 0.7268
Model improved and saved to /home/tim/Documents/06_Projekt_RainyNuScenes/rainynuscenes/detection_application/results/woodscape_model.pth
Epoch 