Our goal is to modify the U-Net architecture, in order to build a AER_U-Net architecure.
So first, we download the U-Net architecture on GitHub : https://github.com/ThorOdinson246/UNet-Water-SegmentationS

In [6]:
!pip install torch torchvision --quiet

# Copy past from git

In [None]:
class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
        super(UNet, self).__init__()
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Encoder Path (Downsampling)
        for feature in features:
            self.downs.append(DoubleConv(in_channels, feature))
            in_channels = feature

        # Bottleneck
        self.bottleneck = DoubleConv(features[-1], features[-1] * 2)

        # Decoder Path (Upsampling)
        for feature in reversed(features):
            # Up-convolution
            self.ups.append(
                nn.ConvTranspose2d(feature * 2, feature, kernel_size=2, stride=2)
            )
            # DoubleConv after concatenation
            self.ups.append(DoubleConv(feature * 2, feature)) # Takes concatenated channels

        # Final Convolution
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

In [None]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.double_conv(x)

In [None]:
# Assumes 'x' is output from previous layer, 'skip_connections' holds encoder outputs
for i in range(0, len(self.ups), 2):
    x = self.ups[i](x) 
    skip_connection = skip_connections[i // 2] 


    concatenate_skip = torch.cat((skip_connection, x), dim=1) 
    x = self.ups[i + 1](concatenate_skip) 

In [None]:
# In WaterBodyDataset class:
def __getitem__(self, idx):
    image = Image.open(img_path).convert("RGB")
    mask = Image.open(mask_path).convert("L") # Grayscale mask

    if self.transform:
        image = self.transform(image)
        mask = self.transform(mask)
    return image, mask

In [None]:
def dice_coefficient(pred, target, smooth=1e-6):
    pred_flat = pred.view(-1)
    target_flat = target.view(-1)
    intersection = (pred_flat * target_flat).sum()
    return (2. * intersection + smooth) / (pred_flat.sum() + target_flat.sum() + smooth)

# Application to our dataset

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
        super(UNet, self).__init__()
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Encoder
        for feature in features:
            self.downs.append(DoubleConv(in_channels, feature))
            in_channels = feature

        # Bottleneck
        self.bottleneck = DoubleConv(features[-1], features[-1] * 2)

        # Decoder
        for feature in reversed(features):
            self.ups.append(
                nn.ConvTranspose2d(feature * 2, feature, kernel_size=2, stride=2)
            )
            self.ups.append(DoubleConv(feature * 2, feature))  # After concatenation

        # Final
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def forward(self, x):
        skip_connections = []
        for down in self.downs:
            x = down(x)
            skip_connections.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)
        skip_connections = skip_connections[::-1]

        for i in range(0, len(self.ups), 2):
            x = self.ups[i](x)
            skip_connection = skip_connections[i // 2]

            if x.shape != skip_connection.shape:
                x = F.interpolate(x, size=skip_connection.shape[2:])

            x = torch.cat((skip_connection, x), dim=1)
            x = self.ups[i + 1](x)

        return self.final_conv(x)

In [7]:
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as T
import torchvision.transforms.functional as TF

class WaterBodyDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        mask = Image.open(self.mask_paths[idx]).convert("L")

        if self.transform:
            image = self.transform(image)
            mask = TF.to_tensor(mask)
            mask = (mask > 0.5).float()

        return image, mask

    def __len__(self):
        return len(self.image_paths)


In [8]:
def dice_coefficient(pred, target, smooth=1e-6):
    pred = torch.sigmoid(pred)
    pred_flat = pred.view(-1)
    target_flat = target.view(-1)
    intersection = (pred_flat * target_flat).sum()
    return (2. * intersection + smooth) / (pred_flat.sum() + target_flat.sum() + smooth)


In [9]:
# Training

model = UNet()
model = model.cuda() if torch.cuda.is_available() else model

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()

for epoch in range(10):
    model.train()
    total_loss = 0
    for imgs, masks in train_loader:
        imgs = imgs.cuda() if torch.cuda.is_available() else imgs
        masks = masks.cuda() if torch.cuda.is_available() else masks

        preds = model(imgs)
        loss = criterion(preds, masks)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1} - Loss: {total_loss:.4f}")


NameError: name 'train_loader' is not defined

In [None]:
# Predictions

import matplotlib.pyplot as plt

model.eval()
with torch.no_grad():
    img, mask = val_dataset[0]
    pred = model(img.unsqueeze(0).to(img.device))
    pred = torch.sigmoid(pred).squeeze().cpu().numpy()

plt.subplot(1, 3, 1)
plt.imshow(img.permute(1,2,0))
plt.title("Image")

plt.subplot(1, 3, 2)
plt.imshow(mask.squeeze(), cmap='gray')
plt.title("Mask")

plt.subplot(1, 3, 3)
plt.imshow(pred > 0.5, cmap='gray')
plt.title("Prediction")
plt.show()
