In [1]:
import os
import cv2
import numpy as np
import torch
import csv
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, random_split
from torchmetrics.segmentation import MeanIoU
import matplotlib.pyplot as plt
from tqdm import tqdm
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import os
import cv2 # pip install opencv-python
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm # Use tqdm.notebook for Jupyter/Colab, or just tqdm
import wandb # pip install wandb

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# --- Block Definitions (Must be defined before UNet class) ---

class EncoderBlock(nn.Module):
    """Standard convolutional block for U-Net encoder"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False), # Often bias=False if using BatchNorm
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True))
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        conv_out = self.conv(x) # Save output before pooling for skip connection
        pool_out = self.pool(conv_out)
        return pool_out, conv_out # Return pooled output and skip connection

class DecoderBlock(nn.Module):
    """Standard convolutional block for U-Net decoder with skip connections"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        # Up-convolution doubles spatial resolution, halves channels (usually)
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        # Convolutions after concatenating up-sampled map and skip connection
        # Input channels = out_channels (from up-conv) + out_channels (from skip connection)
        self.conv = nn.Sequential(
            nn.Conv2d(out_channels * 2, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True))

    def forward(self, x, skip):
        x = self.up(x)
        # Ensure spatial dimensions match for concatenation
        # Sometimes necessary due to rounding in pooling/conv layers
        if x.shape != skip.shape:
             # Basic center cropping (adjust if needed)
             diffY = skip.size()[2] - x.size()[2]
             diffX = skip.size()[3] - x.size()[3]
             skip = skip[:, :, diffY // 2 : skip.size()[2] - diffY // 2 - (diffY % 2),
                           diffX // 2 : skip.size()[3] - diffX // 2 - (diffX % 2)]

        x = torch.cat([x, skip], dim=1) # Concatenate along channel dimension
        x = self.conv(x)
        return x

class DecoderBlockNoSkip(nn.Module):
    """Decoder block without skip connections"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        self.conv = nn.Sequential(
            # Input channels = out_channels (from up-conv only)
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.up(x)
        return self.conv(x)

class ResidualBlock(nn.Module):
    """Basic Residual Block"""
    def __init__(self, in_channels, out_channels, alpha=1.0):
        super().__init__()
        self.use_projection = in_channels != out_channels
        self.alpha = alpha

        # Convolution path
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Projection shortcut for channel mismatch
        if self.use_projection:
            self.projection = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        # No activation after final addition in standard ResNet blocks

    def forward(self, x):
        identity = x

        # Main path
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        # Shortcut path
        if self.use_projection:
            identity = self.projection(identity)

        # Add shortcut
        out += identity * self.alpha # Apply alpha scaling if needed
        out = self.relu(out) # Activation after addition
        return out

class ResidualEncoderBlock(nn.Module):
    """Residual block followed by MaxPool for encoder"""
    def __init__(self, in_channels, out_channels, alpha=1.0):
        super().__init__()
        self.resblock = ResidualBlock(in_channels, out_channels, alpha)
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        res_out = self.resblock(x) # Save output for skip connection
        pool_out = self.pool(res_out)
        return pool_out, res_out # Return pooled output and skip connection

class ResidualDecoderBlock(nn.Module):
    """Up-convolution followed by residual block for decoder"""
    def __init__(self, in_channels, out_channels, alpha=1.0):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        # After concatenation, input channels = out_channels (from up-conv) + out_channels (from skip)
        self.resblock = ResidualBlock(out_channels * 2, out_channels, alpha)

    def forward(self, x, skip):
        x = self.up(x)
        # Ensure spatial dimensions match for concatenation
        if x.shape != skip.shape:
             diffY = skip.size()[2] - x.size()[2]
             diffX = skip.size()[3] - x.size()[3]
             skip = skip[:, :, diffY // 2 : skip.size()[2] - diffY // 2 - (diffY % 2),
                           diffX // 2 : skip.size()[3] - diffX // 2 - (diffX % 2)]

        x = torch.cat([x, skip], dim=1)
        return self.resblock(x)

class AttentionGate(nn.Module):
    """Additive attention gate from https://arxiv.org/abs/1804.03999"""
    def __init__(self, F_g, F_l, F_int):
        super().__init__()
        # Gating signal path
        self.W_g = nn.Sequential(
            nn.Conv2d(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        # Skip connection path
        self.W_x = nn.Sequential(
            nn.Conv2d(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        # Combine and generate attention coefficients
        self.psi = nn.Sequential(
            nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(1),
            nn.Sigmoid()
        )
        self.relu = nn.ReLU(inplace=True)
        # Note: The original paper doesn't mention alpha scaling here.
        # Keeping it 1.0 unless specified otherwise.
        # self.alpha = 1.0

    def forward(self, g, x):
        # g: gating signal (from lower layer, after up-sampling)
        # x: skip connection (from encoder)
        g1 = self.W_g(g) # (Batch, F_int, H, W)
        x1 = self.W_x(x) # (Batch, F_int, H, W)

        # Add and apply ReLU
        psi = self.relu(g1 + x1) # (Batch, F_int, H, W)

        # Generate attention map (alpha coefficients)
        psi = self.psi(psi) # (Batch, 1, H, W)

        # Apply attention map to the skip connection features
        # Element-wise multiplication, broadcasts psi across channels of x
        return x * psi # * self.alpha # Apply alpha scaling if needed

class AttnDecoderBlock(nn.Module):
    """Decoder block incorporating the Attention Gate"""
    def __init__(self, in_channels, out_channels, attn_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        # F_g = channels of gating signal (output of self.up) = out_channels
        # F_l = channels of skip connection = out_channels
        # F_int = intermediate channels = attn_channels
        self.attn_gate = AttentionGate(F_g=out_channels, F_l=out_channels, F_int=attn_channels)
        # Input channels to conv = out_channels (from up-conv) + out_channels (from attended skip)
        self.conv = nn.Sequential(
            nn.Conv2d(out_channels * 2, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x, skip):
        # x: feature map from previous decoder layer
        # skip: skip connection from corresponding encoder layer
        g = self.up(x) # Gating signal 'g', spatial size increased, channels = out_channels

        # Ensure spatial dimensions match for attention gate inputs
        if g.shape != skip.shape:
             diffY = skip.size()[2] - g.size()[2]
             diffX = skip.size()[3] - g.size()[3]
             # Crop skip connection to match size of g
             skip_cropped = skip[:, :, diffY // 2 : skip.size()[2] - diffY // 2 - (diffY % 2),
                                   diffX // 2 : skip.size()[3] - diffX // 2 - (diffX % 2)]
             # Pad g if needed (less common if using valid padding/pooling)
             # g = F.pad(g, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])

             # Check again after potential cropping/padding
             if g.shape != skip_cropped.shape:
                 raise ValueError(f"Shape mismatch after crop: g={g.shape}, skip_cropped={skip_cropped.shape}")
             attn_skip = self.attn_gate(g=g, x=skip_cropped) # Calculate attention-weighted skip connection
        else:
             attn_skip = self.attn_gate(g=g, x=skip)


        # Concatenate the gating signal (up-sampled features) and the attended skip connection
        x = torch.cat([g, attn_skip], dim=1)
        return self.conv(x)


# --- Unified U-Net Class ---

class UNet(nn.Module):
    def __init__(self, num_classes=13, variant="vanilla", alpha=1.0):
        super().__init__()
        # Added "attention" to the valid variants
        assert variant in ["vanilla", "noskip", "residual", "attention"], f"Invalid variant: {variant}"

        self.variant = variant
        self.alpha = alpha # Used mainly for residual variant scaling

        # --- Select block types based on variant ---
        if variant == "residual":
            Encoder = lambda in_ch, out_ch: ResidualEncoderBlock(in_ch, out_ch, alpha)
            # Note: ResidualDecoderBlock needs modification if alpha used differently
            Decoder = lambda in_ch, out_ch: ResidualDecoderBlock(in_ch, out_ch, alpha)
            BottleneckBlock = lambda in_ch, mid_ch, out_ch: nn.Sequential(
                ResidualBlock(in_ch, mid_ch, alpha),
                ResidualBlock(mid_ch, out_ch, alpha)
            )
        elif variant == "attention":
            Encoder = EncoderBlock # Standard encoder
            # Use Attention Decoder Block, requires attn_channels argument
            # We'll define attn_channels inline during instantiation below
            Decoder = AttnDecoderBlock
            BottleneckBlock = lambda in_ch, mid_ch, out_ch: nn.Sequential(
                nn.Conv2d(in_ch, mid_ch, 3, padding=1, bias=False), nn.BatchNorm2d(mid_ch), nn.ReLU(inplace=True),
                nn.Conv2d(mid_ch, out_ch, 3, padding=1, bias=False), nn.BatchNorm2d(out_ch), nn.ReLU(inplace=True)
            )
        else: # "vanilla" or "noskip"
            Encoder = EncoderBlock
            Decoder = DecoderBlock if variant == "vanilla" else DecoderBlockNoSkip
            BottleneckBlock = lambda in_ch, mid_ch, out_ch: nn.Sequential(
                nn.Conv2d(in_ch, mid_ch, 3, padding=1, bias=False), nn.BatchNorm2d(mid_ch), nn.ReLU(inplace=True),
                nn.Conv2d(mid_ch, out_ch, 3, padding=1, bias=False), nn.BatchNorm2d(out_ch), nn.ReLU(inplace=True)
            )

        # --- Define Network Layers ---
        # Encoder Path
        self.enc1 = Encoder(3, 64)       # Output: 64 channels
        self.enc2 = Encoder(64, 128)     # Output: 128 channels
        self.enc3 = Encoder(128, 256)    # Output: 256 channels
        self.enc4 = Encoder(256, 512)    # Output: 512 channels

        # Bottleneck
        self.bottleneck = BottleneckBlock(512, 1024, 1024) # Output: 1024 channels

        # Decoder Path
        if variant == "attention":
            # Specify attn_channels (F_int), typically half of F_l/F_g
            self.dec1 = Decoder(1024, 512, attn_channels=256) # Input: 1024 (bottle) + 512 (skip s4), Output: 512
            self.dec2 = Decoder(512, 256, attn_channels=128)  # Input: 512 (dec1) + 256 (skip s3), Output: 256
            self.dec3 = Decoder(256, 128, attn_channels=64)   # Input: 256 (dec2) + 128 (skip s2), Output: 128
            self.dec4 = Decoder(128, 64, attn_channels=32)    # Input: 128 (dec3) + 64 (skip s1), Output: 64
        else:
            # These decoders don't need attn_channels
            self.dec1 = Decoder(1024, 512) # Input: 1024 (bottle) + [512 (skip s4) if skips], Output: 512
            self.dec2 = Decoder(512, 256)  # Input: 512 (dec1) + [256 (skip s3) if skips], Output: 256
            self.dec3 = Decoder(256, 128)  # Input: 256 (dec2) + [128 (skip s2) if skips], Output: 128
            self.dec4 = Decoder(128, 64)   # Input: 128 (dec3) + [64 (skip s1) if skips], Output: 64

        # Final 1x1 Convolution
        self.final_conv = nn.Conv2d(64, num_classes, kernel_size=1)

    def forward(self, x):
        # Encoder Path
        x, s1 = self.enc1(x)
        x, s2 = self.enc2(x)
        x, s3 = self.enc3(x)
        x, s4 = self.enc4(x)

        # Bottleneck
        x = self.bottleneck(x)

        # Decoder Path
        # Variants that use skip connections
        if self.variant in ["vanilla", "residual", "attention"]:
            x = self.dec1(x, s4)
            x = self.dec2(x, s3)
            x = self.dec3(x, s2)
            x = self.dec4(x, s1)
        # Variant without skip connections
        elif self.variant == "noskip":
            x = self.dec1(x)
            x = self.dec2(x)
            x = self.dec3(x)
            x = self.dec4(x)

        # Final Output
        return self.final_conv(x)

# --- Instantiate Models ---

# Vanilla U-Net
model_vanilla = UNet(variant="vanilla")

# U-Net without skip connections
model_noskip = UNet(variant="noskip")

# Residual U-Net (using alpha for residual scaling)
model_residual = UNet(variant="residual", alpha=0.7)

# Attention U-Net
model_attention = UNet(variant="attention") # alpha is ignored unless AttentionGate/AttnDecoderBlock uses it

print("--------------------------------------- VANILLA U-NET -------------------------------------------")
print(model_vanilla)
print("\n--------------------------------------- NO-SKIP U-NET -------------------------------------------")
print(model_noskip)
print("\n--------------------------------------- RESIDUAL U-NET ------------------------------------------")
print(model_residual)
print("\n--------------------------------------- ATTENTION U-NET -----------------------------------------")
print(model_attention)

# --- Example Input and Forward Pass (Optional) ---
print("\n--- Testing Forward Pass with Dummy Input (Batch Size 2, Height/Width 256) ---")
dummy_input = torch.randn(2, 3, 256, 256) # Example: Batch size 2, 3 channels, 256x256 image

print("Vanilla Output Shape:", model_vanilla(dummy_input).shape)
print("No-Skip Output Shape:", model_noskip(dummy_input).shape)
print("Residual Output Shape:", model_residual(dummy_input).shape)
print("Attention Output Shape:", model_attention(dummy_input).shape)

--------------------------------------- VANILLA U-NET -------------------------------------------
UNet(
  (enc1): EncoderBlock(
    (conv): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (enc2): EncoderBlock(
    (conv): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(

In [3]:
# --- Dataset Class (Provided by User - Slightly adapted) ---
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, label_dir, target_size=(256, 256), transform=None):
        super().__init__()
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.target_size = target_size
        self.transform = transform

        self.image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.png')])
        self.label_files = sorted([f for f in os.listdir(label_dir) if f.endswith('.png')])

        assert len(self.image_files) == len(self.label_files), "Mismatch between images and labels"
        base_image_files = [os.path.splitext(f)[0] for f in self.image_files]
        base_label_files = [os.path.splitext(f)[0] for f in self.label_files]
        assert base_image_files == base_label_files, f"Mismatched filenames: {self.image_files[:5]} vs {self.label_files[:5]}"


    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        image = cv2.imread(img_path)
        if image is None: raise FileNotFoundError(f"Could not read image: {img_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, self.target_size, interpolation=cv2.INTER_LINEAR)

        mask_path = os.path.join(self.label_dir, self.label_files[idx])
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None: raise FileNotFoundError(f"Could not read mask: {mask_path}")
        mask = cv2.resize(mask, self.target_size, interpolation=cv2.INTER_NEAREST)

        img_tensor = transforms.ToTensor()(image).float()
        mask_tensor = torch.from_numpy(mask).long()

        if self.transform:
            seed = np.random.randint(2147483647)
            torch.manual_seed(seed)
            img_tensor = self.transform(img_tensor)

            mask_pil = transforms.ToPILImage()(mask_tensor.unsqueeze(0).byte())
            torch.manual_seed(seed)
            # Apply only spatial transforms from self.transform to mask if needed
            # For simplicity, assuming self.transform only contains spatial ones if applied to mask
            mask_transformed_pil = self.transform(mask_pil) # Needs careful check if transform includes non-spatial
            mask_tensor = transforms.ToTensor()(mask_transformed_pil).squeeze(0).long()

        return img_tensor, mask_tensor

# --- mIoU Function (Provided by User - slightly adapted) ---
def compute_mIoU(pred, target, num_classes):
    pred = pred.argmax(1)
    pred = pred.cpu()
    target = target.cpu()

    ious = []
    for cls in range(num_classes):
        pred_inds = (pred == cls)
        target_inds = (target == cls)
        intersection = (pred_inds & target_inds).sum().item()
        union = (pred_inds | target_inds).sum().item()
        if union == 0:
            ious.append(float('nan'))
        else:
            ious.append(intersection / union)

    valid_ious = [iou for iou in ious if not np.isnan(iou)]
    if not valid_ious: return 0.0
    mean_iou = sum(valid_ious) / len(valid_ious)
    return mean_iou


# --- Plotting Function (Kept for local plotting if needed) ---
def plot_metrics(train_losses, val_losses, train_mious, val_mious, title_prefix=""):
    # This function is now less critical as W&B handles plotting,
    # but can be useful for quick local checks.
    epochs = range(1, len(train_losses) + 1)
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, 'bo-', label='Training Loss')
    plt.plot(epochs, val_losses, 'ro-', label='Validation Loss')
    plt.title(f'{title_prefix} Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(); plt.grid(True)
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_mious, 'bo-', label='Training mIoU')
    plt.plot(epochs, val_mious, 'ro-', label='Validation mIoU')
    plt.title(f'{title_prefix} Training and Validation mIoU')
    plt.xlabel('Epochs'); plt.ylabel('mIoU')
    plt.legend(); plt.grid(True)
    plt.tight_layout(); plt.show()

# --- Visualization Function (Adapted for W&B) ---
def log_predictions_to_wandb(model, dataloader, device, num_samples=5, num_classes=13, epoch=None):
    model.eval()
    samples_shown = 0
    colors = plt.cm.get_cmap('tab20', num_classes)
    log_data = [] # List to store wandb Image objects for logging

    def colorize_mask(mask_tensor):
        mask_np = mask_tensor.cpu().numpy().astype(np.uint8)
        colored_mask = np.zeros((*mask_np.shape, 3), dtype=np.uint8)
        for cls in range(num_classes):
            colored_mask[mask_np == cls] = (np.array(colors(cls)[:3]) * 255).astype(np.uint8)
        return colored_mask

    with torch.no_grad():
        for images, masks_true in dataloader:
            if samples_shown >= num_samples: break
            images = images.to(device)
            masks_true = masks_true.to(device)

            outputs = model(images)
            masks_pred = outputs.argmax(1)

            for i in range(images.size(0)):
                if samples_shown >= num_samples: break

                img_np = images[i].cpu().permute(1, 2, 0).numpy()
                img_np = (img_np * 255).astype(np.uint8)

                mask_true_color = colorize_mask(masks_true[i])
                mask_pred_color = colorize_mask(masks_pred[i])

                # Create wandb.Image objects
                log_data.append(wandb.Image(img_np, caption=f"Epoch_{epoch}_Input_{samples_shown}"))
                log_data.append(wandb.Image(mask_true_color, caption=f"Epoch_{epoch}_TrueMask_{samples_shown}"))
                log_data.append(wandb.Image(mask_pred_color, caption=f"Epoch_{epoch}_PredMask_{samples_shown}"))

                samples_shown += 1
            if samples_shown >= num_samples: break # Break outer loop too

    # Log the collected images to W&B
    if log_data:
         wandb.log({"test_predictions": log_data}, step=epoch if epoch is not None else wandb.run.step) # Associate with epoch if available

# --- Training Loop Function (Adapted for W&B) ---
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, num_classes, log_preds_freq=10):
    # W&B automatically tracks gradients and parameters if watch is called
    wandb.watch(model, criterion, log="all", log_freq=100) # Log gradients, params every 100 steps

    best_val_miou = -1.0
    history = {'train_loss': [], 'val_loss': [], 'train_miou': [], 'val_miou': []} # For local plotting if needed

    for epoch in range(num_epochs):
        # --- Training Phase ---
        model.train()
        running_loss = 0.0
        running_miou = 0.0
        train_samples = 0

        pbar_train = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
        for images, masks in pbar_train:
            images, masks = images.to(device), masks.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            batch_miou = compute_mIoU(outputs, masks, num_classes)
            running_miou += batch_miou * images.size(0)
            train_samples += images.size(0)
            pbar_train.set_postfix({'Loss': running_loss/train_samples, 'mIoU': running_miou/train_samples})

        epoch_train_loss = running_loss / len(train_loader.dataset)
        epoch_train_miou = running_miou / len(train_loader.dataset)
        history['train_loss'].append(epoch_train_loss)
        history['train_miou'].append(epoch_train_miou)

        # --- Validation Phase ---
        model.eval()
        running_val_loss = 0.0
        running_val_miou = 0.0
        val_samples = 0

        pbar_val = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]")
        with torch.no_grad():
            for images, masks in pbar_val:
                images, masks = images.to(device), masks.to(device)
                outputs = model(images)
                loss = criterion(outputs, masks)
                running_val_loss += loss.item() * images.size(0)
                batch_miou = compute_mIoU(outputs, masks, num_classes)
                running_val_miou += batch_miou * images.size(0)
                val_samples += images.size(0)
                pbar_val.set_postfix({'Loss': running_val_loss/val_samples, 'mIoU': running_val_miou/val_samples})

        epoch_val_loss = running_val_loss / len(val_loader.dataset)
        epoch_val_miou = running_val_miou / len(val_loader.dataset)
        history['val_loss'].append(epoch_val_loss)
        history['val_miou'].append(epoch_val_miou)

        print(f"Epoch {epoch+1}/{num_epochs} => "
              f"Train Loss: {epoch_train_loss:.4f}, Train mIoU: {epoch_train_miou:.4f} | "
              f"Val Loss: {epoch_val_loss:.4f}, Val mIoU: {epoch_val_miou:.4f}")

        # --- W&B Logging ---
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": epoch_train_loss,
            "train_miou": epoch_train_miou,
            "val_loss": epoch_val_loss,
            "val_miou": epoch_val_miou
        }, step=epoch + 1) # Use epoch as the step

        # Log sample predictions periodically to W&B during validation
        if (epoch + 1) % log_preds_freq == 0 or epoch == num_epochs -1 :
             print(f"Logging predictions to W&B for epoch {epoch+1}...")
             log_predictions_to_wandb(model, val_loader, device, num_samples=5, num_classes=num_classes, epoch=epoch+1)


        if epoch_val_miou > best_val_miou:
            best_val_miou = epoch_val_miou
            # Optional: Save model checkpoint locally and log as W&B artifact
            # model_path = f"best_model_{wandb.run.name}_epoch_{epoch+1}.pth"
            # torch.save(model.state_dict(), model_path)
            # artifact = wandb.Artifact(f'model-{wandb.run.name}', type='model')
            # artifact.add_file(model_path)
            # wandb.log_artifact(artifact)
            # print(f"  -> New best model saved with Val mIoU: {best_val_miou:.4f}")
            wandb.run.summary["best_val_miou"] = best_val_miou # Add to summary


    return model, history # Return history for potential local plotting

# --- Evaluation Function (Adapted for W&B) ---
def evaluate_model(model, test_loader, criterion, device, num_classes):
    model.eval()
    running_test_loss = 0.0
    running_test_miou = 0.0
    test_samples = 0

    pbar_test = tqdm(test_loader, desc="Testing")
    with torch.no_grad():
        for images, masks in pbar_test:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            loss = criterion(outputs, masks)

            running_test_loss += loss.item() * images.size(0)
            batch_miou = compute_mIoU(outputs, masks, num_classes)
            running_test_miou += batch_miou * images.size(0)
            test_samples += images.size(0)
            pbar_test.set_postfix({'Loss': running_test_loss/test_samples, 'mIoU': running_test_miou/test_samples})

    test_loss = running_test_loss / len(test_loader.dataset)
    test_miou = running_test_miou / len(test_loader.dataset)

    print(f"\n--- Test Set Evaluation ---")
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test mIoU: {test_miou:.4f}")
    print("---------------------------\n")

    # --- W&B Logging ---
    # Log final test metrics to the summary of the run
    wandb.run.summary["test_loss"] = test_loss
    wandb.run.summary["test_miou"] = test_miou

    return test_loss, test_miou

In [4]:
if __name__ == "__main__":

    # --- Configuration ---
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    # --- W&B Configuration ---
    WANDB_PROJECT = "unet-segmentation-variants" # CHANGE THIS to your project name
    WANDB_ENTITY = None # Set to your W&B username or team name, or leave as None

    # --- IMPORTANT: SET YOUR DATA PATHS HERE ---
    TRAIN_DIR = "seg_dataset_256/dataset_256/train" # Contains 'images' and 'labels' subfolders
    TEST_DIR = "seg_dataset_256/dataset_256/test"   # Contains 'images' and 'labels' subfolders
    # ---

    NUM_CLASSES = 13
    IMG_SIZE = (256, 256)
    BATCH_SIZE = 8
    LEARNING_RATE = 1e-4
    NUM_EPOCHS = 50 # Min 50 epochs
    VALIDATION_SPLIT = 0.15
    RANDOM_SEED = 42
    LOG_PREDS_FREQ = 10 # Log prediction images every 10 epochs

    torch.manual_seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    if DEVICE == 'cuda': torch.cuda.manual_seed(RANDOM_SEED)

    # --- Base Hyperparameter Config for W&B ---
    base_config = {
        "learning_rate": LEARNING_RATE,
        "batch_size": BATCH_SIZE,
        "num_epochs": NUM_EPOCHS,
        "validation_split": VALIDATION_SPLIT,
        "image_size": IMG_SIZE[0],
        "num_classes": NUM_CLASSES,
        "seed": RANDOM_SEED,
        "optimizer": "Adam",
        "loss_function": "CrossEntropyLoss"
    }


    # --- Datasets and DataLoaders ---
    base_transform = None # Add augmentations here if needed

    full_train_dataset = SegmentationDataset(
        image_dir=os.path.join(TRAIN_DIR, 'images'),
        label_dir=os.path.join(TRAIN_DIR, 'labels'),
        target_size=IMG_SIZE,
        transform=base_transform
    )
    num_train = len(full_train_dataset)
    num_val = int(np.floor(VALIDATION_SPLIT * num_train))
    num_train_split = num_train - num_val
    train_dataset, val_dataset = random_split(
        full_train_dataset, [num_train_split, num_val],
        generator=torch.Generator().manual_seed(RANDOM_SEED)
    )
    print(f"Split: {len(train_dataset)} training, {len(val_dataset)} validation")

    test_dataset = SegmentationDataset(
        image_dir=os.path.join(TEST_DIR, 'images'),
        label_dir=os.path.join(TEST_DIR, 'labels'),
        target_size=IMG_SIZE,
        transform=None
    )
    print(f"Test set size: {len(test_dataset)}")

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

    # --- Model Training Loop ---
    model_variants = {
        "vanilla": {"variant": "vanilla"},
        "noskip": {"variant": "noskip"},
        "residual": {"variant": "residual"},
        "attention": {"variant": "attention"}
    }
    results = {}

    for name, params in model_variants.items():
        print(f"\n===== Training U-Net ({name}) =====")

        # --- Initialize W&B Run ---
        run_config = base_config.copy()
        run_config.update(params) # Add variant-specific params (like alpha)
        run_name = f"unet-{name}-lr{LEARNING_RATE}-bs{BATCH_SIZE}-ep{NUM_EPOCHS}"

        run = wandb.init(
            project=WANDB_PROJECT,
            entity=WANDB_ENTITY,
            name=run_name,
            config=run_config,
            reinit=True # Allow reinitializing within the same script
        )

        # Instantiate Model
        model = UNet(num_classes=NUM_CLASSES, **params).to(DEVICE)
        print(f"Model Parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

        # Train the model
        trained_model, history = train_model(
            model, train_loader, val_loader, criterion, optimizer, NUM_EPOCHS, DEVICE, NUM_CLASSES, LOG_PREDS_FREQ
        )

        # Evaluate on Test Set
        test_loss, test_miou = evaluate_model(
            trained_model, test_loader, criterion, DEVICE, NUM_CLASSES
        )
        results[name] = test_miou

        # Log final predictions from test set to W&B
        print(f"--- Logging final Test Predictions for U-Net ({name}) ---")
        log_predictions_to_wandb(trained_model, test_loader, DEVICE, num_samples=10, num_classes=NUM_CLASSES, epoch=NUM_EPOCHS) # Log more test samples

        # --- Finish W&B Run ---
        run.finish()

        # Optional: Plot locally using history if needed
        # plot_metrics(history['train_loss'], history['val_loss'], history['train_miou'], history['val_miou'], title_prefix=f"U-Net ({name})")


    # --- Final Results ---
    print("\n===== Final Test mIoU Results =====")
    for name, miou in results.items():
        print(f"U-Net ({name}): {miou:.4f}") # These results are also in the W&B run summaries
    print("===================================\n")

    # --- Discussion Placeholders ---
    print("\n--- Discussion Points (Refer to W&B plots/results) ---")
    print("3.2 U-Net without skip connections:")
    print(" - Q: What differences do you observe in the visualized results (W&B images) compared to the standard U-Net results?")
    print(" - A: [Analyze logged images in W&B - e.g., NoSkip likely shows much poorer boundary definition, smoother/blurrier outputs, possibly missing small objects compared to Vanilla.]")
    print(" - Q: Discuss the importance of skip connections using W&B metrics/plots.")
    print(" - A: [Compare mIoU curves and final test mIoU in W&B for Vanilla vs NoSkip. Expect significantly lower mIoU for NoSkip. Explain how skip connections preserve high-res spatial info lost in pooling, enabling precise localization crucial for segmentation.]")
    print("\n3.3 Residual U-Net:")
    print(" - [Compare Residual U-Net metrics (loss, mIoU curves, test mIoU) and visualizations in W&B against Vanilla U-Net. Did residual blocks improve training stability (smoother loss curves?) or final performance? Discuss the potential benefits (gradient flow) vs. added complexity/parameters.]")
    print("-------------------------\n")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Using device: cuda
Split: 1700 training, 300 validation
Test set size: 500

===== Training U-Net (vanilla) =====


[34m[1mwandb[0m: Currently logged in as: [33msiddharth-singh2504[0m ([33msiddharth-singh2504-iiit-hyderabad[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Model Parameters: 31038413


Epoch 1/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 1/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 1/50 => Train Loss: 1.0515, Train mIoU: 0.3540 | Val Loss: 0.7024, Val mIoU: 0.5179


Epoch 2/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 2/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2/50 => Train Loss: 0.4719, Train mIoU: 0.5582 | Val Loss: 0.3532, Val mIoU: 0.5747


Epoch 3/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 3/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 3/50 => Train Loss: 0.2896, Train mIoU: 0.6009 | Val Loss: 0.2266, Val mIoU: 0.6799


Epoch 4/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 4/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 4/50 => Train Loss: 0.1993, Train mIoU: 0.6904 | Val Loss: 0.1706, Val mIoU: 0.7164


Epoch 5/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 5/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 5/50 => Train Loss: 0.1484, Train mIoU: 0.7391 | Val Loss: 0.1339, Val mIoU: 0.7460


Epoch 6/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 6/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 6/50 => Train Loss: 0.1247, Train mIoU: 0.7624 | Val Loss: 0.1293, Val mIoU: 0.7452


Epoch 7/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 7/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 7/50 => Train Loss: 0.1003, Train mIoU: 0.7979 | Val Loss: 0.0939, Val mIoU: 0.8050


Epoch 8/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 8/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 8/50 => Train Loss: 0.0877, Train mIoU: 0.8137 | Val Loss: 0.1069, Val mIoU: 0.8059


Epoch 9/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 9/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 9/50 => Train Loss: 0.0760, Train mIoU: 0.8356 | Val Loss: 0.0779, Val mIoU: 0.8278


Epoch 10/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 10/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 10/50 => Train Loss: 0.0666, Train mIoU: 0.8528 | Val Loss: 0.0677, Val mIoU: 0.8522
Logging predictions to W&B for epoch 10...


  colors = plt.cm.get_cmap('tab20', num_classes)


Epoch 11/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 11/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 11/50 => Train Loss: 0.0609, Train mIoU: 0.8644 | Val Loss: 0.0796, Val mIoU: 0.8236


Epoch 12/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 12/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 12/50 => Train Loss: 0.0615, Train mIoU: 0.8552 | Val Loss: 0.0589, Val mIoU: 0.8563


Epoch 13/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 13/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 13/50 => Train Loss: 0.0506, Train mIoU: 0.8811 | Val Loss: 0.0535, Val mIoU: 0.8741


Epoch 14/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 14/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 14/50 => Train Loss: 0.0459, Train mIoU: 0.8929 | Val Loss: 0.0525, Val mIoU: 0.8715


Epoch 15/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 15/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 15/50 => Train Loss: 0.0434, Train mIoU: 0.8952 | Val Loss: 0.0473, Val mIoU: 0.8873


Epoch 16/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 16/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 16/50 => Train Loss: 0.0390, Train mIoU: 0.9069 | Val Loss: 0.0444, Val mIoU: 0.8890


Epoch 17/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 17/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 17/50 => Train Loss: 0.0427, Train mIoU: 0.8933 | Val Loss: 0.0529, Val mIoU: 0.8549


Epoch 18/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 18/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 18/50 => Train Loss: 0.0369, Train mIoU: 0.9074 | Val Loss: 0.0416, Val mIoU: 0.8934


Epoch 19/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 19/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 19/50 => Train Loss: 0.0328, Train mIoU: 0.9192 | Val Loss: 0.0384, Val mIoU: 0.9008


Epoch 20/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 20/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 20/50 => Train Loss: 0.0315, Train mIoU: 0.9221 | Val Loss: 0.0377, Val mIoU: 0.9046
Logging predictions to W&B for epoch 20...


Epoch 21/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 21/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 21/50 => Train Loss: 0.0287, Train mIoU: 0.9303 | Val Loss: 0.0358, Val mIoU: 0.9096


Epoch 22/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 22/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 22/50 => Train Loss: 0.0281, Train mIoU: 0.9299 | Val Loss: 0.0362, Val mIoU: 0.9045


Epoch 23/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 23/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 23/50 => Train Loss: 0.0262, Train mIoU: 0.9364 | Val Loss: 0.0342, Val mIoU: 0.9115


Epoch 24/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 24/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 24/50 => Train Loss: 0.0254, Train mIoU: 0.9377 | Val Loss: 0.0334, Val mIoU: 0.9132


Epoch 25/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 25/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 25/50 => Train Loss: 0.0238, Train mIoU: 0.9404 | Val Loss: 0.0339, Val mIoU: 0.9127


Epoch 26/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 26/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 26/50 => Train Loss: 0.0786, Train mIoU: 0.8110 | Val Loss: 0.0521, Val mIoU: 0.8582


Epoch 27/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 27/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 27/50 => Train Loss: 0.0367, Train mIoU: 0.9032 | Val Loss: 0.0375, Val mIoU: 0.8965


Epoch 28/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 28/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 28/50 => Train Loss: 0.0266, Train mIoU: 0.9316 | Val Loss: 0.0321, Val mIoU: 0.9148


Epoch 29/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 29/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 29/50 => Train Loss: 0.0232, Train mIoU: 0.9419 | Val Loss: 0.0307, Val mIoU: 0.9183


Epoch 30/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 30/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 30/50 => Train Loss: 0.0216, Train mIoU: 0.9466 | Val Loss: 0.0302, Val mIoU: 0.9211
Logging predictions to W&B for epoch 30...


Epoch 31/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 31/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 31/50 => Train Loss: 0.0206, Train mIoU: 0.9496 | Val Loss: 0.0301, Val mIoU: 0.9201


Epoch 32/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 32/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 32/50 => Train Loss: 0.0201, Train mIoU: 0.9507 | Val Loss: 0.0293, Val mIoU: 0.9223


Epoch 33/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 33/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 33/50 => Train Loss: 0.0196, Train mIoU: 0.9512 | Val Loss: 0.0291, Val mIoU: 0.9223


Epoch 34/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 34/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 34/50 => Train Loss: 0.0187, Train mIoU: 0.9538 | Val Loss: 0.0292, Val mIoU: 0.9238


Epoch 35/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 35/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 35/50 => Train Loss: 0.0182, Train mIoU: 0.9565 | Val Loss: 0.0395, Val mIoU: 0.9032


Epoch 36/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 36/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 36/50 => Train Loss: 0.0361, Train mIoU: 0.8992 | Val Loss: 0.0334, Val mIoU: 0.9094


Epoch 37/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 37/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 37/50 => Train Loss: 0.0205, Train mIoU: 0.9467 | Val Loss: 0.0283, Val mIoU: 0.9246


Epoch 38/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 38/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 38/50 => Train Loss: 0.0176, Train mIoU: 0.9568 | Val Loss: 0.0278, Val mIoU: 0.9262


Epoch 39/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 39/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 39/50 => Train Loss: 0.0166, Train mIoU: 0.9599 | Val Loss: 0.0274, Val mIoU: 0.9292


Epoch 40/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 40/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 40/50 => Train Loss: 0.0159, Train mIoU: 0.9623 | Val Loss: 0.0278, Val mIoU: 0.9261
Logging predictions to W&B for epoch 40...


Epoch 41/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 41/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 41/50 => Train Loss: 0.0157, Train mIoU: 0.9629 | Val Loss: 0.0279, Val mIoU: 0.9294


Epoch 42/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 42/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 42/50 => Train Loss: 0.0152, Train mIoU: 0.9647 | Val Loss: 0.0285, Val mIoU: 0.9270


Epoch 43/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 43/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 43/50 => Train Loss: 0.0150, Train mIoU: 0.9643 | Val Loss: 0.0276, Val mIoU: 0.9296


Epoch 44/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 44/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 44/50 => Train Loss: 0.0145, Train mIoU: 0.9649 | Val Loss: 0.0281, Val mIoU: 0.9272


Epoch 45/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 45/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 45/50 => Train Loss: 0.0143, Train mIoU: 0.9656 | Val Loss: 0.0278, Val mIoU: 0.9278


Epoch 46/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 46/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 46/50 => Train Loss: 0.0144, Train mIoU: 0.9661 | Val Loss: 0.0276, Val mIoU: 0.9298


Epoch 47/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 47/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 47/50 => Train Loss: 0.0137, Train mIoU: 0.9680 | Val Loss: 0.0277, Val mIoU: 0.9301


Epoch 48/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 48/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 48/50 => Train Loss: 0.0131, Train mIoU: 0.9674 | Val Loss: 0.0282, Val mIoU: 0.9287


Epoch 49/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 49/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 49/50 => Train Loss: 0.0129, Train mIoU: 0.9698 | Val Loss: 0.0274, Val mIoU: 0.9318


Epoch 50/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 50/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 50/50 => Train Loss: 0.0491, Train mIoU: 0.8753 | Val Loss: 0.0398, Val mIoU: 0.8872
Logging predictions to W&B for epoch 50...


Testing:   0%|          | 0/63 [00:00<?, ?it/s]


--- Test Set Evaluation ---
Test Loss: 0.0395
Test mIoU: 0.8612
---------------------------

--- Logging final Test Predictions for U-Net (vanilla) ---


0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂
train_miou,▁▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇████▆▇███████▇█████████▇
val_loss,█▅▄▃▃▃▂▂▂▂▂▁▁▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_miou,▁▂▄▄▅▆▆▆▇▆▇▇▇▇▇██████▇█████████████████▇

0,1
best_val_miou,0.93183
epoch,50.0
test_loss,0.03951
test_miou,0.86116
train_loss,0.04914
train_miou,0.8753
val_loss,0.0398
val_miou,0.88722



===== Training U-Net (noskip) =====


Model Parameters: 27904973


Epoch 1/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 1/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 1/50 => Train Loss: 1.1502, Train mIoU: 0.2408 | Val Loss: 0.7799, Val mIoU: 0.2720


Epoch 2/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 2/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2/50 => Train Loss: 0.5820, Train mIoU: 0.4028 | Val Loss: 0.4602, Val mIoU: 0.4280


Epoch 3/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 3/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 3/50 => Train Loss: 0.3747, Train mIoU: 0.4565 | Val Loss: 0.3101, Val mIoU: 0.5013


Epoch 4/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 4/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 4/50 => Train Loss: 0.2645, Train mIoU: 0.5492 | Val Loss: 0.2404, Val mIoU: 0.5615


Epoch 5/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 5/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 5/50 => Train Loss: 0.2046, Train mIoU: 0.6009 | Val Loss: 0.1902, Val mIoU: 0.6563


Epoch 6/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 6/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 6/50 => Train Loss: 0.1669, Train mIoU: 0.7017 | Val Loss: 0.1638, Val mIoU: 0.6969


Epoch 7/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 7/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 7/50 => Train Loss: 0.1411, Train mIoU: 0.7368 | Val Loss: 0.1386, Val mIoU: 0.7397


Epoch 8/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 8/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 8/50 => Train Loss: 0.1262, Train mIoU: 0.7598 | Val Loss: 0.1260, Val mIoU: 0.7576


Epoch 9/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 9/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 9/50 => Train Loss: 0.1124, Train mIoU: 0.7821 | Val Loss: 0.1192, Val mIoU: 0.7631


Epoch 10/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 10/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 10/50 => Train Loss: 0.1064, Train mIoU: 0.7896 | Val Loss: 0.1127, Val mIoU: 0.7730
Logging predictions to W&B for epoch 10...


  colors = plt.cm.get_cmap('tab20', num_classes)


Epoch 11/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 11/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 11/50 => Train Loss: 0.0997, Train mIoU: 0.7978 | Val Loss: 0.1081, Val mIoU: 0.7765


Epoch 12/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 12/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 12/50 => Train Loss: 0.0904, Train mIoU: 0.8165 | Val Loss: 0.1027, Val mIoU: 0.7902


Epoch 13/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 13/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 13/50 => Train Loss: 0.0849, Train mIoU: 0.8265 | Val Loss: 0.0989, Val mIoU: 0.7910


Epoch 14/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 14/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 14/50 => Train Loss: 0.0810, Train mIoU: 0.8327 | Val Loss: 0.0959, Val mIoU: 0.8022


Epoch 15/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 15/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 15/50 => Train Loss: 0.0779, Train mIoU: 0.8382 | Val Loss: 0.0956, Val mIoU: 0.7920


Epoch 16/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 16/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 16/50 => Train Loss: 0.0745, Train mIoU: 0.8445 | Val Loss: 0.0929, Val mIoU: 0.8079


Epoch 17/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 17/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 17/50 => Train Loss: 0.1687, Train mIoU: 0.6542 | Val Loss: 0.1392, Val mIoU: 0.6855


Epoch 18/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 18/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 18/50 => Train Loss: 0.1105, Train mIoU: 0.7550 | Val Loss: 0.1049, Val mIoU: 0.7688


Epoch 19/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 19/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 19/50 => Train Loss: 0.0864, Train mIoU: 0.8190 | Val Loss: 0.0937, Val mIoU: 0.8031


Epoch 20/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 20/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 20/50 => Train Loss: 0.0748, Train mIoU: 0.8479 | Val Loss: 0.0911, Val mIoU: 0.8123
Logging predictions to W&B for epoch 20...


Epoch 21/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 21/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 21/50 => Train Loss: 0.0703, Train mIoU: 0.8602 | Val Loss: 0.0880, Val mIoU: 0.8201


Epoch 22/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 22/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 22/50 => Train Loss: 0.0671, Train mIoU: 0.8657 | Val Loss: 0.0881, Val mIoU: 0.8179


Epoch 23/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 23/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 23/50 => Train Loss: 0.0658, Train mIoU: 0.8687 | Val Loss: 0.0863, Val mIoU: 0.8254


Epoch 24/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 24/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 24/50 => Train Loss: 0.0634, Train mIoU: 0.8752 | Val Loss: 0.0883, Val mIoU: 0.8209


Epoch 25/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 25/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 25/50 => Train Loss: 0.0618, Train mIoU: 0.8773 | Val Loss: 0.0862, Val mIoU: 0.8240


Epoch 26/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 26/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 26/50 => Train Loss: 0.0604, Train mIoU: 0.8805 | Val Loss: 0.0864, Val mIoU: 0.8252


Epoch 27/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 27/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 27/50 => Train Loss: 0.0597, Train mIoU: 0.8818 | Val Loss: 0.0853, Val mIoU: 0.8254


Epoch 28/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 28/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 28/50 => Train Loss: 0.0596, Train mIoU: 0.8806 | Val Loss: 0.0912, Val mIoU: 0.8100


Epoch 29/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 29/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 29/50 => Train Loss: 0.1897, Train mIoU: 0.5751 | Val Loss: 0.1380, Val mIoU: 0.6862


Epoch 30/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 30/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 30/50 => Train Loss: 0.1042, Train mIoU: 0.7631 | Val Loss: 0.0985, Val mIoU: 0.7851
Logging predictions to W&B for epoch 30...


Epoch 31/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 31/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 31/50 => Train Loss: 0.0778, Train mIoU: 0.8369 | Val Loss: 0.0892, Val mIoU: 0.8082


Epoch 32/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 32/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 32/50 => Train Loss: 0.0680, Train mIoU: 0.8643 | Val Loss: 0.0868, Val mIoU: 0.8215


Epoch 33/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 33/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 33/50 => Train Loss: 0.0625, Train mIoU: 0.8779 | Val Loss: 0.0859, Val mIoU: 0.8263


Epoch 34/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 34/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 34/50 => Train Loss: 0.0597, Train mIoU: 0.8850 | Val Loss: 0.0856, Val mIoU: 0.8274


Epoch 35/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 35/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 35/50 => Train Loss: 0.0580, Train mIoU: 0.8894 | Val Loss: 0.0855, Val mIoU: 0.8277


Epoch 36/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 36/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 36/50 => Train Loss: 0.0565, Train mIoU: 0.8926 | Val Loss: 0.0837, Val mIoU: 0.8330


Epoch 37/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 37/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 37/50 => Train Loss: 0.0551, Train mIoU: 0.8947 | Val Loss: 0.0844, Val mIoU: 0.8328


Epoch 38/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 38/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 38/50 => Train Loss: 0.0542, Train mIoU: 0.8967 | Val Loss: 0.0849, Val mIoU: 0.8315


Epoch 39/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 39/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 39/50 => Train Loss: 0.0536, Train mIoU: 0.8985 | Val Loss: 0.0854, Val mIoU: 0.8333


Epoch 40/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 40/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 40/50 => Train Loss: 0.0534, Train mIoU: 0.8991 | Val Loss: 0.0851, Val mIoU: 0.8347
Logging predictions to W&B for epoch 40...


Epoch 41/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 41/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 41/50 => Train Loss: 0.0523, Train mIoU: 0.9002 | Val Loss: 0.0853, Val mIoU: 0.8361


Epoch 42/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 42/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 42/50 => Train Loss: 0.0517, Train mIoU: 0.9015 | Val Loss: 0.0852, Val mIoU: 0.8318


Epoch 43/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 43/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 43/50 => Train Loss: 0.0507, Train mIoU: 0.9044 | Val Loss: 0.0845, Val mIoU: 0.8335


Epoch 44/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 44/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 44/50 => Train Loss: 0.0500, Train mIoU: 0.9063 | Val Loss: 0.0860, Val mIoU: 0.8315


Epoch 45/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 45/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 45/50 => Train Loss: 0.0496, Train mIoU: 0.9062 | Val Loss: 0.0875, Val mIoU: 0.8352


Epoch 46/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 46/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 46/50 => Train Loss: 0.0489, Train mIoU: 0.9068 | Val Loss: 0.0852, Val mIoU: 0.8351


Epoch 47/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 47/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 47/50 => Train Loss: 0.0484, Train mIoU: 0.9084 | Val Loss: 0.0863, Val mIoU: 0.8377


Epoch 48/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 48/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 48/50 => Train Loss: 0.0481, Train mIoU: 0.9105 | Val Loss: 0.0855, Val mIoU: 0.8375


Epoch 49/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 49/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 49/50 => Train Loss: 0.0474, Train mIoU: 0.9107 | Val Loss: 0.0865, Val mIoU: 0.8364


Epoch 50/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 50/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 50/50 => Train Loss: 0.0463, Train mIoU: 0.9122 | Val Loss: 0.0854, Val mIoU: 0.8368
Logging predictions to W&B for epoch 50...


Testing:   0%|          | 0/63 [00:00<?, ?it/s]


--- Test Set Evaluation ---
Test Loss: 0.0889
Test mIoU: 0.8056
---------------------------

--- Logging final Test Predictions for U-Net (noskip) ---


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_miou,▁▃▃▄▅▆▆▇▇▇▇▇▇▇▅▇▇▇█████▄▆███████████████
val_loss,█▅▃▃▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_miou,▁▄▅▆▆▇▇▇▇▇█▇█▆▇████████▆▇███████████████

0,1
best_val_miou,0.83767
epoch,50.0
test_loss,0.08886
test_miou,0.80558
train_loss,0.04633
train_miou,0.91217
val_loss,0.08541
val_miou,0.8368



===== Training U-Net (residual) =====


Model Parameters: 51315597


Epoch 1/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 1/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 1/50 => Train Loss: 0.5884, Train mIoU: 0.3776 | Val Loss: 0.2694, Val mIoU: 0.5522


Epoch 2/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 2/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2/50 => Train Loss: 0.2077, Train mIoU: 0.5931 | Val Loss: 0.1757, Val mIoU: 0.6648


Epoch 3/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 3/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 3/50 => Train Loss: 0.1385, Train mIoU: 0.7055 | Val Loss: 0.1198, Val mIoU: 0.7123


Epoch 4/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 4/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 4/50 => Train Loss: 0.1046, Train mIoU: 0.7619 | Val Loss: 0.1012, Val mIoU: 0.7374


Epoch 5/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 5/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 5/50 => Train Loss: 0.0832, Train mIoU: 0.7973 | Val Loss: 0.0812, Val mIoU: 0.7932


Epoch 6/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 6/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 6/50 => Train Loss: 0.0694, Train mIoU: 0.8277 | Val Loss: 0.0697, Val mIoU: 0.8224


Epoch 7/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 7/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 7/50 => Train Loss: 0.0664, Train mIoU: 0.8349 | Val Loss: 0.0668, Val mIoU: 0.8269


Epoch 8/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 8/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 8/50 => Train Loss: 0.0534, Train mIoU: 0.8628 | Val Loss: 0.0530, Val mIoU: 0.8630


Epoch 9/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 9/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 9/50 => Train Loss: 0.0461, Train mIoU: 0.8802 | Val Loss: 0.0503, Val mIoU: 0.8642


Epoch 10/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 10/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 10/50 => Train Loss: 0.0413, Train mIoU: 0.8924 | Val Loss: 0.0469, Val mIoU: 0.8761
Logging predictions to W&B for epoch 10...


  colors = plt.cm.get_cmap('tab20', num_classes)


Epoch 11/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 11/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 11/50 => Train Loss: 0.0378, Train mIoU: 0.9001 | Val Loss: 0.0440, Val mIoU: 0.8788


Epoch 12/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 12/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 12/50 => Train Loss: 0.0551, Train mIoU: 0.8629 | Val Loss: 0.0637, Val mIoU: 0.8321


Epoch 13/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 13/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 13/50 => Train Loss: 0.0408, Train mIoU: 0.8884 | Val Loss: 0.0409, Val mIoU: 0.8851


Epoch 14/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 14/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 14/50 => Train Loss: 0.0318, Train mIoU: 0.9158 | Val Loss: 0.0374, Val mIoU: 0.8974


Epoch 15/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 15/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 15/50 => Train Loss: 0.0286, Train mIoU: 0.9242 | Val Loss: 0.0364, Val mIoU: 0.8996


Epoch 16/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 16/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 16/50 => Train Loss: 0.0266, Train mIoU: 0.9305 | Val Loss: 0.0355, Val mIoU: 0.9020


Epoch 17/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 17/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 17/50 => Train Loss: 0.0247, Train mIoU: 0.9359 | Val Loss: 0.0349, Val mIoU: 0.9044


Epoch 18/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 18/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 18/50 => Train Loss: 0.0235, Train mIoU: 0.9384 | Val Loss: 0.0334, Val mIoU: 0.9101


Epoch 19/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 19/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 19/50 => Train Loss: 0.0232, Train mIoU: 0.9376 | Val Loss: 0.0330, Val mIoU: 0.9090


Epoch 20/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 20/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 20/50 => Train Loss: 0.0212, Train mIoU: 0.9444 | Val Loss: 0.0334, Val mIoU: 0.9084
Logging predictions to W&B for epoch 20...


Epoch 21/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 21/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 21/50 => Train Loss: 0.0203, Train mIoU: 0.9470 | Val Loss: 0.0318, Val mIoU: 0.9108


Epoch 22/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 22/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 22/50 => Train Loss: 0.0191, Train mIoU: 0.9513 | Val Loss: 0.0314, Val mIoU: 0.9144


Epoch 23/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 23/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 23/50 => Train Loss: 0.0186, Train mIoU: 0.9520 | Val Loss: 0.0329, Val mIoU: 0.9117


Epoch 24/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 24/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 24/50 => Train Loss: 0.0198, Train mIoU: 0.9453 | Val Loss: 0.0363, Val mIoU: 0.9007


Epoch 25/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 25/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 25/50 => Train Loss: 0.0175, Train mIoU: 0.9556 | Val Loss: 0.0305, Val mIoU: 0.9178


Epoch 26/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 26/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 26/50 => Train Loss: 0.0156, Train mIoU: 0.9610 | Val Loss: 0.0298, Val mIoU: 0.9202


Epoch 27/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 27/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 27/50 => Train Loss: 0.0149, Train mIoU: 0.9629 | Val Loss: 0.0293, Val mIoU: 0.9212


Epoch 28/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 28/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 28/50 => Train Loss: 0.0146, Train mIoU: 0.9637 | Val Loss: 0.0297, Val mIoU: 0.9205


Epoch 29/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 29/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 29/50 => Train Loss: 0.0139, Train mIoU: 0.9653 | Val Loss: 0.0310, Val mIoU: 0.9182


Epoch 30/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 30/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 30/50 => Train Loss: 0.0135, Train mIoU: 0.9664 | Val Loss: 0.0301, Val mIoU: 0.9214
Logging predictions to W&B for epoch 30...


Epoch 31/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 31/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 31/50 => Train Loss: 0.0326, Train mIoU: 0.9291 | Val Loss: 0.1145, Val mIoU: 0.7214


Epoch 32/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 32/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 32/50 => Train Loss: 0.0508, Train mIoU: 0.8552 | Val Loss: 0.0438, Val mIoU: 0.8443


Epoch 33/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 33/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 33/50 => Train Loss: 0.0220, Train mIoU: 0.9364 | Val Loss: 0.0288, Val mIoU: 0.9190


Epoch 34/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 34/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 34/50 => Train Loss: 0.0153, Train mIoU: 0.9610 | Val Loss: 0.0290, Val mIoU: 0.9205


Epoch 35/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 35/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 35/50 => Train Loss: 0.0131, Train mIoU: 0.9676 | Val Loss: 0.0287, Val mIoU: 0.9246


Epoch 36/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 36/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 36/50 => Train Loss: 0.0120, Train mIoU: 0.9714 | Val Loss: 0.0286, Val mIoU: 0.9267


Epoch 37/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 37/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 37/50 => Train Loss: 0.0112, Train mIoU: 0.9735 | Val Loss: 0.0291, Val mIoU: 0.9261


Epoch 38/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 38/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 38/50 => Train Loss: 0.0107, Train mIoU: 0.9759 | Val Loss: 0.0293, Val mIoU: 0.9267


Epoch 39/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 39/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 39/50 => Train Loss: 0.0102, Train mIoU: 0.9762 | Val Loss: 0.0298, Val mIoU: 0.9267


Epoch 40/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 40/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 40/50 => Train Loss: 0.0099, Train mIoU: 0.9771 | Val Loss: 0.0300, Val mIoU: 0.9267
Logging predictions to W&B for epoch 40...


Epoch 41/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 41/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 41/50 => Train Loss: 0.0097, Train mIoU: 0.9773 | Val Loss: 0.0308, Val mIoU: 0.9239


Epoch 42/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 42/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 42/50 => Train Loss: 0.0095, Train mIoU: 0.9781 | Val Loss: 0.0307, Val mIoU: 0.9262


Epoch 43/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 43/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 43/50 => Train Loss: 0.0093, Train mIoU: 0.9782 | Val Loss: 0.0328, Val mIoU: 0.9227


Epoch 44/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 44/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 44/50 => Train Loss: 0.0094, Train mIoU: 0.9778 | Val Loss: 0.0315, Val mIoU: 0.9262


Epoch 45/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 45/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 45/50 => Train Loss: 0.0088, Train mIoU: 0.9793 | Val Loss: 0.0305, Val mIoU: 0.9264


Epoch 46/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 46/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 46/50 => Train Loss: 0.0084, Train mIoU: 0.9804 | Val Loss: 0.0338, Val mIoU: 0.9238


Epoch 47/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 47/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 47/50 => Train Loss: 0.0114, Train mIoU: 0.9664 | Val Loss: 0.0680, Val mIoU: 0.8476


Epoch 48/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 48/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 48/50 => Train Loss: 0.0410, Train mIoU: 0.8938 | Val Loss: 0.0321, Val mIoU: 0.9122


Epoch 49/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 49/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 49/50 => Train Loss: 0.0156, Train mIoU: 0.9598 | Val Loss: 0.0281, Val mIoU: 0.9252


Epoch 50/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 50/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 50/50 => Train Loss: 0.0108, Train mIoU: 0.9740 | Val Loss: 0.0285, Val mIoU: 0.9293
Logging predictions to W&B for epoch 50...


Testing:   0%|          | 0/63 [00:00<?, ?it/s]


--- Test Set Evaluation ---
Test Loss: 0.0298
Test mIoU: 0.9024
---------------------------

--- Logging final Test Predictions for U-Net (residual) ---


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▃▃▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_miou,▁▄▅▅▆▇▇▇▇▇▇▇▇▇██████████▇▇████████████▇█
val_loss,█▃▃▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
val_miou,▁▃▄▅▆▇▇▇▇▆▇▇▇██████▇████▄████████████▆██

0,1
best_val_miou,0.92928
epoch,50.0
test_loss,0.0298
test_miou,0.90241
train_loss,0.01083
train_miou,0.97396
val_loss,0.02852
val_miou,0.92928



===== Training U-Net (attention) =====


Model Parameters: 31389945


Epoch 1/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 1/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 1/50 => Train Loss: 1.1789, Train mIoU: 0.1553 | Val Loss: 0.8220, Val mIoU: 0.2140


Epoch 2/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 2/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2/50 => Train Loss: 0.5681, Train mIoU: 0.3539 | Val Loss: 0.4258, Val mIoU: 0.4351


Epoch 3/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 3/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 3/50 => Train Loss: 0.3352, Train mIoU: 0.4774 | Val Loss: 0.2684, Val mIoU: 0.5606


Epoch 4/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 4/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 4/50 => Train Loss: 0.2247, Train mIoU: 0.5991 | Val Loss: 0.1885, Val mIoU: 0.6859


Epoch 5/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 5/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 5/50 => Train Loss: 0.1615, Train mIoU: 0.7245 | Val Loss: 0.1418, Val mIoU: 0.7381


Epoch 6/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 6/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 6/50 => Train Loss: 0.1250, Train mIoU: 0.7676 | Val Loss: 0.1145, Val mIoU: 0.7640


Epoch 7/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 7/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 7/50 => Train Loss: 0.1056, Train mIoU: 0.7930 | Val Loss: 0.0962, Val mIoU: 0.8102


Epoch 8/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 8/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 8/50 => Train Loss: 0.0873, Train mIoU: 0.8221 | Val Loss: 0.0828, Val mIoU: 0.8245


Epoch 9/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 9/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 9/50 => Train Loss: 0.0853, Train mIoU: 0.8206 | Val Loss: 0.1282, Val mIoU: 0.7552


Epoch 10/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 10/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 10/50 => Train Loss: 0.0783, Train mIoU: 0.8244 | Val Loss: 0.0700, Val mIoU: 0.8464
Logging predictions to W&B for epoch 10...


  colors = plt.cm.get_cmap('tab20', num_classes)


Epoch 11/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 11/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 11/50 => Train Loss: 0.0600, Train mIoU: 0.8670 | Val Loss: 0.0606, Val mIoU: 0.8573


Epoch 12/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 12/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 12/50 => Train Loss: 0.0544, Train mIoU: 0.8751 | Val Loss: 0.0558, Val mIoU: 0.8672


Epoch 13/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 13/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 13/50 => Train Loss: 0.0485, Train mIoU: 0.8862 | Val Loss: 0.0517, Val mIoU: 0.8764


Epoch 14/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 14/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 14/50 => Train Loss: 0.0574, Train mIoU: 0.8651 | Val Loss: 0.0551, Val mIoU: 0.8551


Epoch 15/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 15/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 15/50 => Train Loss: 0.0439, Train mIoU: 0.8936 | Val Loss: 0.0472, Val mIoU: 0.8854


Epoch 16/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 16/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 16/50 => Train Loss: 0.0391, Train mIoU: 0.9069 | Val Loss: 0.0438, Val mIoU: 0.8872


Epoch 17/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 17/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 17/50 => Train Loss: 0.0359, Train mIoU: 0.9142 | Val Loss: 0.0418, Val mIoU: 0.8892


Epoch 18/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 18/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 18/50 => Train Loss: 0.0341, Train mIoU: 0.9168 | Val Loss: 0.0391, Val mIoU: 0.9011


Epoch 19/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 19/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 19/50 => Train Loss: 0.0327, Train mIoU: 0.9187 | Val Loss: 0.0393, Val mIoU: 0.8951


Epoch 20/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 20/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 20/50 => Train Loss: 0.0300, Train mIoU: 0.9267 | Val Loss: 0.0366, Val mIoU: 0.9036
Logging predictions to W&B for epoch 20...


Epoch 21/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 21/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 21/50 => Train Loss: 0.0283, Train mIoU: 0.9302 | Val Loss: 0.0354, Val mIoU: 0.9090


Epoch 22/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 22/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 22/50 => Train Loss: 0.0264, Train mIoU: 0.9349 | Val Loss: 0.0344, Val mIoU: 0.9085


Epoch 23/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 23/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 23/50 => Train Loss: 0.0247, Train mIoU: 0.9401 | Val Loss: 0.0339, Val mIoU: 0.9116


Epoch 24/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 24/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 24/50 => Train Loss: 0.0653, Train mIoU: 0.8575 | Val Loss: 0.0939, Val mIoU: 0.7651


Epoch 25/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 25/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 25/50 => Train Loss: 0.0568, Train mIoU: 0.8443 | Val Loss: 0.0507, Val mIoU: 0.8657


Epoch 26/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 26/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 26/50 => Train Loss: 0.0343, Train mIoU: 0.9087 | Val Loss: 0.0361, Val mIoU: 0.9022


Epoch 27/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 27/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 27/50 => Train Loss: 0.0274, Train mIoU: 0.9283 | Val Loss: 0.0330, Val mIoU: 0.9114


Epoch 28/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 28/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 28/50 => Train Loss: 0.0246, Train mIoU: 0.9371 | Val Loss: 0.0322, Val mIoU: 0.9097


Epoch 29/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 29/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 29/50 => Train Loss: 0.0226, Train mIoU: 0.9436 | Val Loss: 0.0315, Val mIoU: 0.9160


Epoch 30/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 30/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 30/50 => Train Loss: 0.0216, Train mIoU: 0.9459 | Val Loss: 0.0307, Val mIoU: 0.9184
Logging predictions to W&B for epoch 30...


Epoch 31/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 31/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 31/50 => Train Loss: 0.0205, Train mIoU: 0.9494 | Val Loss: 0.0307, Val mIoU: 0.9188


Epoch 32/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 32/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 32/50 => Train Loss: 0.0195, Train mIoU: 0.9522 | Val Loss: 0.0297, Val mIoU: 0.9204


Epoch 33/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 33/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 33/50 => Train Loss: 0.0191, Train mIoU: 0.9535 | Val Loss: 0.0301, Val mIoU: 0.9195


Epoch 34/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 34/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 34/50 => Train Loss: 0.0191, Train mIoU: 0.9526 | Val Loss: 0.0325, Val mIoU: 0.9146


Epoch 35/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 35/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 35/50 => Train Loss: 0.0184, Train mIoU: 0.9541 | Val Loss: 0.0294, Val mIoU: 0.9228


Epoch 36/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 36/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 36/50 => Train Loss: 0.0177, Train mIoU: 0.9564 | Val Loss: 0.0294, Val mIoU: 0.9228


Epoch 37/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 37/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 37/50 => Train Loss: 0.0167, Train mIoU: 0.9588 | Val Loss: 0.0296, Val mIoU: 0.9233


Epoch 38/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 38/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 38/50 => Train Loss: 0.0164, Train mIoU: 0.9602 | Val Loss: 0.0287, Val mIoU: 0.9257


Epoch 39/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 39/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 39/50 => Train Loss: 0.0162, Train mIoU: 0.9606 | Val Loss: 0.0421, Val mIoU: 0.8896


Epoch 40/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 40/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 40/50 => Train Loss: 0.0554, Train mIoU: 0.8550 | Val Loss: 0.0418, Val mIoU: 0.8818
Logging predictions to W&B for epoch 40...


Epoch 41/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 41/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 41/50 => Train Loss: 0.0253, Train mIoU: 0.9312 | Val Loss: 0.0290, Val mIoU: 0.9196


Epoch 42/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 42/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 42/50 => Train Loss: 0.0182, Train mIoU: 0.9539 | Val Loss: 0.0277, Val mIoU: 0.9240


Epoch 43/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 43/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 43/50 => Train Loss: 0.0160, Train mIoU: 0.9611 | Val Loss: 0.0272, Val mIoU: 0.9276


Epoch 44/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 44/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 44/50 => Train Loss: 0.0150, Train mIoU: 0.9648 | Val Loss: 0.0267, Val mIoU: 0.9287


Epoch 45/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 45/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 45/50 => Train Loss: 0.0145, Train mIoU: 0.9663 | Val Loss: 0.0271, Val mIoU: 0.9297


Epoch 46/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 46/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 46/50 => Train Loss: 0.0139, Train mIoU: 0.9679 | Val Loss: 0.0275, Val mIoU: 0.9297


Epoch 47/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 47/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 47/50 => Train Loss: 0.0134, Train mIoU: 0.9693 | Val Loss: 0.0271, Val mIoU: 0.9308


Epoch 48/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 48/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 48/50 => Train Loss: 0.0130, Train mIoU: 0.9700 | Val Loss: 0.0273, Val mIoU: 0.9301


Epoch 49/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 49/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 49/50 => Train Loss: 0.0128, Train mIoU: 0.9705 | Val Loss: 0.0273, Val mIoU: 0.9297


Epoch 50/50 [Train]:   0%|          | 0/213 [00:00<?, ?it/s]

Epoch 50/50 [Val]:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 50/50 => Train Loss: 0.0126, Train mIoU: 0.9709 | Val Loss: 0.0282, Val mIoU: 0.9274
Logging predictions to W&B for epoch 50...


Testing:   0%|          | 0/63 [00:00<?, ?it/s]


--- Test Set Evaluation ---
Test Loss: 0.0301
Test mIoU: 0.8995
---------------------------

--- Logging final Test Predictions for U-Net (attention) ---


0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_miou,▁▃▄▅▆▆▇▇▇▇▇▇██████▇▇███████████▇████████
val_loss,█▅▃▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_miou,▁▃▄▆▆▇▇▆▇▇▇▇████████▇███████████████████

0,1
best_val_miou,0.93084
epoch,50.0
test_loss,0.03015
test_miou,0.89949
train_loss,0.01258
train_miou,0.97088
val_loss,0.02818
val_miou,0.92745



===== Final Test mIoU Results =====
U-Net (vanilla): 0.8612
U-Net (noskip): 0.8056
U-Net (residual): 0.9024
U-Net (attention): 0.8995


--- Discussion Points (Refer to W&B plots/results) ---
3.2 U-Net without skip connections:
 - Q: What differences do you observe in the visualized results (W&B images) compared to the standard U-Net results?
 - A: [Analyze logged images in W&B - e.g., NoSkip likely shows much poorer boundary definition, smoother/blurrier outputs, possibly missing small objects compared to Vanilla.]
 - Q: Discuss the importance of skip connections using W&B metrics/plots.
 - A: [Compare mIoU curves and final test mIoU in W&B for Vanilla vs NoSkip. Expect significantly lower mIoU for NoSkip. Explain how skip connections preserve high-res spatial info lost in pooling, enabling precise localization crucial for segmentation.]

3.3 Residual U-Net:
 - [Compare Residual U-Net metrics (loss, mIoU curves, test mIoU) and visualizations in W&B against Vanilla U-Net. Did residual 

# Analysis of U-Net Variants for Semantic Segmentation

Based on the provided CSV data and plots from the WandB export, here's an analysis of the different U-Net architecture variants trained for semantic segmentation.

**Summary of Final Test Performance:**

| Variant          | Test mIoU        | Best Val mIoU    | Test Loss        | Runtime (s) |
| :--------------- | :--------------- | :--------------- | :--------------- | :---------- |
| **unet-residual**| **0.9024**       | 0.9293           | **0.0298**       | 7413        |
| unet-attention   | 0.8995           | 0.9308           | 0.0301           | 6282        |
| unet-vanilla     | 0.8612           | **0.9318**       | 0.0395           | 4776        |
| unet-noskip      | 0.8056           | 0.8377           | 0.0889           | 3912        |

---

## Detailed Analysis per Variant

### 3.1 Vanilla U-Net

*   **Performance:**
    *   Test mIoU: 0.8612
    *   Best Validation mIoU: 0.9318 (Highest among all variants)
    *   Test Loss: 0.0395
    *   Final Validation mIoU (Epoch 50): 0.8872
    *   Final Training mIoU (Epoch 50): 0.8753
*   **Plots Observation:** The training and validation mIoU curves show rapid initial learning, followed by plateauing. The validation mIoU peaked earlier and settled slightly lower by epoch 50 compared to its peak. The model demonstrates the effectiveness of the standard U-Net architecture.
*   **Conclusion:** Serves as a solid baseline. It achieved the highest peak validation score but didn't generalize quite as well to the test set as the Residual or Attention variants.

### 3.2 U-Net without Skip Connections

*   **Performance:**
    *   Test mIoU: 0.8056 (Significantly lower than others)
    *   Best Validation mIoU: 0.8377
    *   Test Loss: 0.0889 (Highest among all variants)
    *   Final Validation mIoU (Epoch 50): 0.8368
    *   Final Training mIoU (Epoch 50): 0.9122
*   **Plots Observation:** Both `val_miou` and `train_miou` curves plateau at significantly lower levels compared to variants with skip connections. The curves also show noticeable dips (e.g., around steps 15 and 30), indicating training instability or difficulty in learning robust features without the skip connections. Convergence is slower and less effective.
*   **Differences in Visualized Results:** Compared to the standard U-Net, the visualized segmentation masks from this model would likely exhibit:
    *   Much coarser boundaries and less precise localization.
    *   More misclassified pixels, especially for smaller objects or intricate details.
    *   Potential failure to segment certain classes or finer structures altogether.
    The lack of high-resolution information from early encoder layers hinders the decoder's ability to reconstruct accurate details.
*   **Importance of Skip Connections:**
    *   Skip connections are fundamentally important in U-Net. They bridge the semantic gap between the encoder and decoder by concatenating high-resolution feature maps from the encoder pathway directly with the up-sampled feature maps in the decoder pathway.
    *   **Role:**
        1.  **Preserving Spatial Detail:** They allow the decoder to reuse fine-grained spatial information from earlier layers, which is lost during the downsampling (pooling/strided convolution) in the encoder. This is critical for accurate boundary localization.
        2.  **Gradient Flow:** They provide shorter paths for gradients to flow during backpropagation, mitigating the vanishing gradient problem and facilitating the training of deeper networks.
        3.  **Combining Context and Detail:** They enable the fusion of high-level semantic context (learned in deeper layers) with low-level, high-resolution details (from shallower layers).
    *   The poor performance (`test_miou` of 0.8056 vs 0.8612+ for others) and unstable training curves of the `unet-noskip` variant empirically demonstrate the critical necessity of skip connections for effective U-Net performance in segmentation tasks.

### 3.3 Residual U-Net

*   **Performance:**
    *   Test mIoU: 0.9024 (Highest)
    *   Best Validation mIoU: 0.9293
    *   Test Loss: 0.0298 (Lowest)
    *   Final Validation mIoU (Epoch 50): 0.9293
    *   Final Training mIoU (Epoch 50): 0.9740 (Highest)
*   **Plots Observation:** The training and validation mIoU curves show smooth, stable, and rapid convergence to very high values, slightly outperforming the vanilla U-Net in the final stages, especially on the training set.
*   **Conclusion:** Replacing standard convolutional blocks with residual blocks yielded the best test set performance in terms of both mIoU and loss. Residual connections likely aided optimization within the blocks, leading to slightly more robust feature learning and better generalization, albeit at the cost of the longest training time (7413s).

### 3.4 Gated Attention U-Net

*   **Performance:**
    *   Test mIoU: 0.8995 (Very close second)
    *   Best Validation mIoU: 0.9308
    *   Test Loss: 0.0301 (Very close second)
    *   Final Validation mIoU (Epoch 50): 0.9274
    *   Final Training mIoU (Epoch 50): 0.9709
*   **Plots Observation:** Similar to the Residual U-Net, the plots show smooth and effective learning, converging quickly to high mIoU values. The performance is consistently high throughout the later stages of training.
*   **Advantages of Attention Gates (as per paper):**
    *   **Focus on Relevant Regions:** AGs learn to automatically focus on target structures of interest while suppressing feature responses in irrelevant background regions.
    *   **Improved Sensitivity/Accuracy:** By weighting features passed through skip connections based on relevance (guided by the gating signal), they improve model sensitivity and predictive accuracy, particularly for varying object shapes and sizes.
    *   **Computational Efficiency:** They add minimal computational overhead compared to significantly increasing model depth or using model ensembles.
*   **Role of Gating Signal:** The gating signal, typically derived from a coarser scale (deeper layer) in the network, provides contextual information. This context helps the attention gate identify which spatial locations in the high-resolution feature map from the skip connection are most salient for the segmentation task at the current decoder stage. It essentially guides the attention mechanism.
*   **Differences Compared to Standard U-Net:** The Attention U-Net significantly outperformed the Vanilla U-Net on the test set (`test_miou` 0.8995 vs 0.8612) and achieved comparable performance to the Residual U-Net. This suggests that the attention mechanism effectively refined the information passed through the skip connections, improving the model's ability to focus on relevant features and generalize better to unseen test data.

---

## Overall Comparison and Conclusions

1.  **Skip Connections are Essential:** The `unet-noskip` variant performed drastically worse than all others, highlighting the critical role of skip connections in preserving spatial detail and enabling effective feature fusion in U-Net architectures.
2.  **Advanced Blocks Improve Performance:** Both Residual blocks (`unet-residual`) and Attention Gates (`unet-attention`) led to notable improvements in test set mIoU and loss compared to the `unet-vanilla` baseline.
3.  **Residual vs. Attention:** In this experiment, the Residual U-Net achieved the marginally best test performance, closely followed by the Attention U-Net. The choice between them might depend on specific dataset characteristics and computational budget (Residual U-Net took significantly longer to train).
4.  **Generalization:** While the Vanilla U-Net reached the highest peak validation mIoU, the Residual and Attention variants generalized better to the final test set, indicating their mechanisms might help prevent minor overfitting or learn more robust features.
5.  **Training Efficiency:** The Vanilla U-Net was faster than Residual and Attention variants, while the No-Skip variant was the fastest (likely due to simpler architecture but ineffective). The Residual variant was the slowest.
