In [None]:
# CS444 U-Net Project Setup for Semantic Segmentation
# Attention U-Net(MobileNetV2) on Cityscapes dataset

# Step 1: Install Required Libraries
!pip install torch torchvision
!pip install segmentation-models-pytorch
!pip install matplotlib



In [None]:
# Step 2: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
ls '/content/drive/MyDrive/U-Net Segmentation Project/'

 [0m[01;34mCityScapes[0m/                              'ResNet50 - U-Net_Segmentation.ipynb'
'Copy of U-Net_Segmentation.ipynb'         U-Net_Segmentation.ipynb
'Final Project Report.gdoc'                U-Net_Segmentation_UNet++.ipynb
'mobilenetv2 - U-Net_Segmentation.ipynb'


In [None]:
# Step 3: Verify Dataset Structure
import os

# Update this path to your specific Google Drive location for the Cityscapes dataset
root_path = '/content/drive/MyDrive/U-Net Segmentation Project/CityScapes'

if not (os.path.exists(os.path.join(root_path, 'leftImg8bit')) and os.path.exists(os.path.join(root_path, 'gtFine'))):
    raise RuntimeError(f"Ensure 'leftImg8bit' and 'gtFine' folders are in the root directory: {root_path}. Please verify the path.")
else:
    print(f"Dataset structure verified: 'leftImg8bit' and 'gtFine' found in {root_path}.")

Dataset structure verified: 'leftImg8bit' and 'gtFine' found in /content/drive/MyDrive/U-Net Segmentation Project/CityScapes.


In [None]:
# Step 4: Verify GPU Environment
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    device = torch.device("cuda")
else:
    print("No GPU detected. Training will run on CPU, which will be very slow. Ensure GPU runtime is enabled in Colab (Runtime > Change runtime type).")
    device = torch.device("cpu")

# Set environment variables for CUDA debugging
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # Synchronous CUDA errors
os.environ["TORCH_USE_CUDA_DSA"] = "1"    # Device-side assertions

PyTorch version: 2.6.0+cu124
CUDA available: True
CUDA version: 12.4
GPU device: Tesla T4


In [None]:
# Step 5: Load Cityscapes Dataset
from torchvision.datasets import Cityscapes
from torchvision import transforms
import numpy as np
from PIL import Image # Needed for target resize resampling

# Define transforms (resize to manage memory)
# Original image size for Cityscapes is 1024x2048. Resizing significantly.
img_height, img_width = 128, 256 # Reduced size for faster training and lower memory

transform = transforms.Compose([
    transforms.Resize((img_height, img_width)),
    transforms.ToTensor()
])

def target_to_tensor(target):
    # Resize target segmentation map. PIL Image object is expected by Resize.
    target = target.resize((img_width, img_height), resample=Image.NEAREST) # Nearest neighbor for labels
    target_np = np.array(target, dtype=np.uint8)

    # Map Cityscapes labels to 0-18 (for 19 classes), set others to ignore_index (255)
    label_map = {
        # name: id, trainId
        'unlabeled': (0, 255), 'ego vehicle': (1, 255), 'rectification border': (2, 255),
        'out of roi': (3, 255), 'static': (4, 255), 'dynamic': (5, 255),
        'ground': (6, 255), 'road': (7, 0), 'sidewalk': (8, 1),
        'parking': (9, 255), 'rail track': (10, 255), 'building': (11, 2),
        'wall': (12, 3), 'fence': (13, 4), 'guard rail': (14, 255),
        'bridge': (15, 255), 'tunnel': (16, 255), 'pole': (17, 5),
        'polegroup': (18, 255), 'traffic light': (19, 6), 'traffic sign': (20, 7),
        'vegetation': (21, 8), 'terrain': (22, 9), 'sky': (23, 10),
        'person': (24, 11), 'rider': (25, 12), 'car': (26, 13),
        'truck': (27, 14), 'bus': (28, 15), 'caravan': (29, 255),
        'trailer': (30, 255), 'train': (31, 16), 'motorcycle': (32, 17),
        'bicycle': (33, 18)
    }
    mapped_target = np.full_like(target_np, 255, dtype=np.uint8)

    for cityscapes_id_tuple, train_id in label_map.items():
        original_id = train_id[0]
        target_train_id = train_id[1]
        if target_train_id != 255:
            mapped_target[target_np == original_id] = target_train_id

    return torch.from_numpy(mapped_target).long()

# Load training and validation datasets
try:
    train_dataset = Cityscapes(
        root=root_path,
        split='train',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=target_to_tensor
    )
    val_dataset = Cityscapes(
        root=root_path,
        split='val',
        mode='fine',
        target_type='semantic',
        transform=transform,
        target_transform=target_to_tensor
    )
    print(f"Successfully loaded train dataset with {len(train_dataset)} samples.")
    print(f"Successfully loaded val dataset with {len(val_dataset)} samples.")

except Exception as e:
    print(f"Error loading dataset: {e}")
    print("Please ensure your `root_path` is correct and the dataset is properly structured.")
    raise

# Verify dataset labels from a sample
if len(train_dataset) > 0:
    sample_image, sample_target = train_dataset[0]
    print(f"Sample image shape: {sample_image.shape}")
    print(f"Sample target shape: {sample_target.shape}")
    unique_labels = torch.unique(sample_target)
    print(f"Sample target unique values: {unique_labels}")
    if not (all( (unique_labels >= 0) & (unique_labels <= 18) | (unique_labels == 255) )):
        print("Warning: Unexpected label values found in sample target.")
else:
    print("Train dataset is empty. Cannot verify sample.")

Successfully loaded train dataset with 2975 samples.
Successfully loaded val dataset with 500 samples.
Sample image shape: torch.Size([3, 128, 256])
Sample target shape: torch.Size([128, 256])
Sample target unique values: tensor([  0,   1,   2,   5,   7,   8,   9,  10,  11,  13,  18, 255])


In [None]:
# Step 5.1: Verify Dataset Integrity
def check_dataset_integrity(dataset, split_name, num_samples_to_check=5):
    print(f"Checking {split_name} dataset integrity (first {num_samples_to_check} samples)...")
    if len(dataset) == 0:
        print(f"{split_name} dataset is empty. Skipping integrity check.")
        return True
    for i in range(min(num_samples_to_check, len(dataset))):
        try:
            image, target = dataset[i]
            # Basic checks
            if not isinstance(image, torch.Tensor) or not isinstance(target, torch.Tensor):
                print(f"Error at index {i}: Image or target is not a tensor.")
                return False
            if image.shape != torch.Size([3, img_height, img_width]):
                 print(f"Error at index {i}: Unexpected image shape {image.shape}.")
                 return False
            if target.shape != torch.Size([img_height, img_width]):
                 print(f"Error at index {i}: Unexpected target shape {target.shape}.")
                 return False
            if not ((target >= 0) & (target <= 18) | (target == 255)).all():
                 print(f"Error at index {i}: Target contains invalid labels {torch.unique(target)}.")
                 return False

        except Exception as e:
            print(f"Error accessing sample {i} in {split_name} dataset: {e}")
            return False
    print(f"{split_name} dataset integrity verified for first {num_samples_to_check} samples.")
    return True

# Check train and validation datasets (first few samples)
if not check_dataset_integrity(train_dataset, "train"):
    raise RuntimeError("Train dataset integrity check failed.")
if not check_dataset_integrity(val_dataset, "val"):
    raise RuntimeError("Validation dataset integrity check failed.")

Checking train dataset integrity (first 5 samples)...
train dataset integrity verified for first 5 samples.
Checking val dataset integrity (first 5 samples)...
val dataset integrity verified for first 5 samples.


In [None]:
# Step 6: Create Data Loaders
from torch.utils.data import DataLoader

batch_size = 2

if len(train_dataset) > 0:
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
else:
    train_loader = None
    print("Train dataset is empty. Train loader not created.")

if len(val_dataset) > 0:
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
else:
    val_loader = None
    print("Validation dataset is empty. Validation loader not created.")

if train_loader:
    print(f"Train loader created with batch size {batch_size}.")
if val_loader:
    print(f"Validation loader created with batch size {batch_size}.")

Train loader created with batch size 2.
Validation loader created with batch size 2.


In [None]:
# Step 7: Define Models
import segmentation_models_pytorch as smp

num_classes = 19  # Based on the label mapping (0-18)
encoder_name = "mobilenet_v2"

# Attention U-Net (with SCSE attention)
model_attention = smp.Unet(
    encoder_name=encoder_name,
    encoder_weights="imagenet",
    in_channels=3,
    classes=num_classes,
    activation=None,
    decoder_attention_type="scse" # Spatial and Channel Squeeze & Excitation
)
print(f"Attention U-Net (SCSE) with {encoder_name} encoder defined.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Standard U-Net with mobilenet_v2 encoder defined.
Attention U-Net (SCSE) with mobilenet_v2 encoder defined.
U-Net++ with mobilenet_v2 encoder defined.


In [None]:
# Step 8: Define Loss Function
import torch.nn as nn
# CrossEntropyLoss expects raw logits from the model and long type targets.
# ignore_index=255 means that pixels with label 255 will not contribute to the loss.
criterion = nn.CrossEntropyLoss(ignore_index=255)
print("CrossEntropyLoss defined with ignore_index=255.")

CrossEntropyLoss defined with ignore_index=255.


In [None]:
# Step 8.1: Setup Model Saving and Validation Loss Function
import os

save_dir = os.path.join(root_path, 'saved_models')
os.makedirs(save_dir, exist_ok=True)
print(f"Models will be saved in: {save_dir}")

def evaluate_validation_loss(model, loader, criterion, device, model_name):
    model.eval()
    running_val_loss = 0.0
    num_batches = len(loader)

    if num_batches == 0:
        print(f"Warning: Validation loader for {model_name} is empty. Cannot compute validation loss.")
        return float('inf')

    with torch.no_grad():
        for images, targets in loader:
            images, targets = images.to(device), targets.to(device)

            # Ensure targets are valid before calculating loss
            if not ((targets >= 0) & (targets < num_classes) | (targets == 255)).all():
                print(f"Validation: Invalid labels detected in targets for {model_name}! Unique: {torch.unique(targets)}. Skipping batch for loss calculation.")
                pass

            outputs = model(images)
            loss = criterion(outputs, targets)
            running_val_loss += loss.item()

    avg_val_loss = running_val_loss / num_batches if num_batches > 0 else float('inf')
    if device.type == "cuda":
        torch.cuda.empty_cache()
    return avg_val_loss

Models will be saved in: /content/drive/MyDrive/U-Net Segmentation Project/CityScapes/saved_models


In [None]:
# Step 9: Training Configuration
num_epochs = 15
learning_rate = 0.001
weight_decay = 1e-5

def train_one_epoch(model, loader, optimizer, criterion, device, model_name):
    model.train()
    running_loss = 0.0
    num_batches = len(loader)

    if num_batches == 0:
        print(f"Warning: Training loader for {model_name} is empty. Skipping training epoch.")
        return 0.0

    for i, (images, targets) in enumerate(loader):
        images, targets = images.to(device), targets.to(device)

        try:
            if not ((targets >= 0) & (targets < num_classes) | (targets == 255)).all():
                print(f"Batch {i}: Invalid labels detected in targets for {model_name}! Unique: {torch.unique(targets)}. Skipping batch.")
                problematic_targets = targets.clone()
                problematic_targets[(targets >= 0) & (targets < num_classes) | (targets == 255)] = -1
                print(f"Problematic target values: {torch.unique(problematic_targets.masked_select(problematic_targets != -1))}")
                continue

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            if (i + 1) % (num_batches // 5 if num_batches >= 5 else 1) == 0: # Print 5 times per epoch
                print(f"    {model_name} - Batch {i+1}/{num_batches}, Loss: {loss.item():.4f}")

        except RuntimeError as e:
            print(f"RuntimeError during training {model_name} at batch {i}: {e}")
            if "CUDA out of memory" in str(e):
                print("CUDA OOM: Try reducing batch size or image dimensions.")
                if device.type == "cuda": torch.cuda.empty_cache()
            return float('inf') # Indicate critical error

    epoch_loss = running_loss / num_batches if num_batches > 0 else 0
    if device.type == "cuda":
        torch.cuda.empty_cache()
    return epoch_loss

In [None]:
# --- Training Cell 9.2: Attention U-Net ---
print(f"\n--- Training Attention U-Net (SCSE, {encoder_name}) ---")
model_name_attention = "attention_unet_scse"
best_model_attention_path = os.path.join(save_dir, f"{model_name_attention}_best_val_mobilenetv2.pth")
final_model_attention_path = os.path.join(save_dir, f"{model_name_attention}_final_epoch_mobilenetv2.pth")
best_val_loss_attention = float('inf')

if train_loader:
    model_attention.to(device)
    # Initialize optimizer with weight decay
    optimizer_attention = torch.optim.Adam(model_attention.parameters(), lr=learning_rate, weight_decay=weight_decay)
    # Initialize learning rate scheduler
    scheduler_attention = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_attention, mode='min', factor=0.1, patience=5) # Adjust patience as needed

    if device.type == "cuda":
        torch.cuda.empty_cache()
        print(f"Initial GPU memory allocated for Attention U-Net: {torch.cuda.memory_allocated(device) / 1024**2:.2f} MB")

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        # Training phase
        epoch_loss_attention = train_one_epoch(model_attention, train_loader, optimizer_attention, criterion, device, "Attention U-Net")
        if epoch_loss_attention == float('inf'):
            print(f"Critical error training Attention U-Net in epoch {epoch+1}. Stopping training for this model.")
            break
        print(f"  Attention U-Net - Epoch {epoch+1} Average Training Loss: {epoch_loss_attention:.4f}")

        # Validation phase
        if val_loader:
            current_val_loss = evaluate_validation_loss(model_attention, val_loader, criterion, device, "Attention U-Net (Validation)")
            print(f"  Attention U-Net - Epoch {epoch+1} Average Validation Loss: {current_val_loss:.4f}")

            if current_val_loss < best_val_loss_attention:
                best_val_loss_attention = current_val_loss
                torch.save(model_attention.state_dict(), best_model_attention_path)
                print(f"    New best model saved to {best_model_attention_path} (Val Loss: {current_val_loss:.4f})")

            # Step the learning rate scheduler based on validation loss
            scheduler_attention.step(current_val_loss)
        else:
            print("  Skipping validation for checkpointing and LR scheduling as val_loader is not available.")

    # Save the final model state
    torch.save(model_attention.state_dict(), final_model_attention_path)
    print(f"--- Attention U-Net Training Finished. Final model saved to {final_model_attention_path} ---")
    if val_loader and os.path.exists(best_model_attention_path):
        print(f"Best validation model saved at {best_model_attention_path} with val_loss: {best_val_loss_attention:.4f}")
else:
    print("Skipping Attention U-Net training as train_loader is not available.")


--- Training Attention U-Net (SCSE, mobilenet_v2) ---
Initial GPU memory allocated for Attention U-Net: 26.56 MB
Epoch 1/15
    Attention U-Net - Batch 297/1488, Loss: 1.0064
    Attention U-Net - Batch 594/1488, Loss: 0.7380
    Attention U-Net - Batch 891/1488, Loss: 0.6810
    Attention U-Net - Batch 1188/1488, Loss: 0.4368
    Attention U-Net - Batch 1485/1488, Loss: 0.4129
  Attention U-Net - Epoch 1 Average Training Loss: 0.6590
  Attention U-Net - Epoch 1 Average Validation Loss: 0.5205
    New best model saved to /content/drive/MyDrive/U-Net Segmentation Project/CityScapes/saved_models/attention_unet_scse_best_val_mobilenetv2.pth (Val Loss: 0.5205)
Epoch 2/15
    Attention U-Net - Batch 297/1488, Loss: 0.3197
    Attention U-Net - Batch 594/1488, Loss: 0.4265
    Attention U-Net - Batch 891/1488, Loss: 0.2249
    Attention U-Net - Batch 1188/1488, Loss: 0.7110
    Attention U-Net - Batch 1485/1488, Loss: 0.4530
  Attention U-Net - Epoch 2 Average Training Loss: 0.4674
  Attent

In [None]:
# --- Training Cell 9.2.1: Fine-tune Attention U-Net ---
print(f"\n--- Fine-tuning Attention U-Net (SCSE, {encoder_name}) ---")

# Define parameters for fine-tuning
num_ft_epochs = 5
fine_tune_lr = 5e-5

# Paths for the fine-tuned model
model_name_attention_ft = "attention_unet_scse_ft"
best_model_attention_ft_path = os.path.join(save_dir, f"{model_name_attention_ft}_best_val_mobilenetv2.pth")
final_model_attention_ft_path = os.path.join(save_dir, f"{model_name_attention_ft}_final_epoch_mobilenetv2.pth")

pretrained_model_path = os.path.join(save_dir, "attention_unet_scse_best_val_mobilenetv2.pth")

best_val_loss_attention_ft = 0.4018

if not os.path.exists(pretrained_model_path):
    print(f"Error: Pretrained model not found at {pretrained_model_path}. Cannot fine-tune.")
    print("Please ensure the initial training (Cell 9.2) was run and the best model was saved correctly.")
else:
    if train_loader and val_loader:
        if 'model_attention' not in globals() or model_attention is None:
            print("Re-defining Attention U-Net model architecture for fine-tuning...")
            model_attention = smp.Unet(
                encoder_name=encoder_name,
                encoder_weights=None,
                in_channels=3,
                classes=num_classes,
                activation=None,
                decoder_attention_type="scse"
            )
            print("Attention U-Net model re-defined.")
        else:
            print("Using existing 'model_attention' instance.")


        print(f"Loading weights from {pretrained_model_path} for fine-tuning...")
        try:
            model_attention.load_state_dict(torch.load(pretrained_model_path, map_location=device))
            print("Successfully loaded pre-trained weights.")
        except Exception as e:
            print(f"Error loading pre-trained weights: {e}")
            raise

        model_attention.to(device)

        optimizer_attention_ft = torch.optim.Adam(model_attention.parameters(), lr=fine_tune_lr, weight_decay=weight_decay)

        scheduler_attention_ft = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_attention_ft, mode='min', factor=0.1, patience=3)

        if device.type == "cuda":
            torch.cuda.empty_cache()
            print(f"Initial GPU memory allocated for Fine-tuning Attention U-Net: {torch.cuda.memory_allocated(device) / 1024**2:.2f} MB")

        print(f"Starting fine-tuning for {num_ft_epochs} epochs with LR: {fine_tune_lr}")
        for epoch in range(num_ft_epochs):
            current_epoch_ft = epoch + 1
            print(f"Fine-tuning Epoch {current_epoch_ft}/{num_ft_epochs}. Current LR: {optimizer_attention_ft.param_groups[0]['lr']:.2e}")

            epoch_loss_attention_ft = train_one_epoch(model_attention, train_loader, optimizer_attention_ft, criterion, device, "Attention U-Net (Fine-tuning)")
            if epoch_loss_attention_ft == float('inf'):
                print(f"Critical error fine-tuning Attention U-Net in epoch {current_epoch_ft}. Stopping fine-tuning.")
                break
            print(f"  Attention U-Net (Fine-tuning) - Epoch {current_epoch_ft} Average Training Loss: {epoch_loss_attention_ft:.4f}")

            current_val_loss_ft = evaluate_validation_loss(model_attention, val_loader, criterion, device, "Attention U-Net (Fine-tuning Validation)")
            print(f"  Attention U-Net (Fine-tuning) - Epoch {current_epoch_ft} Average Validation Loss: {current_val_loss_ft:.4f}")

            if current_val_loss_ft < best_val_loss_attention_ft:
                best_val_loss_attention_ft = current_val_loss_ft
                torch.save(model_attention.state_dict(), best_model_attention_ft_path)
                print(f"    New best fine-tuned model saved to {best_model_attention_ft_path} (Val Loss: {current_val_loss_ft:.4f})")

            scheduler_attention_ft.step(current_val_loss_ft)

        torch.save(model_attention.state_dict(), final_model_attention_ft_path)
        print(f"--- Attention U-Net Fine-tuning Finished. Final model saved to {final_model_attention_ft_path} ---")
        if os.path.exists(best_model_attention_ft_path):
            print(f"Best fine-tuned validation model saved at {best_model_attention_ft_path} with val_loss: {best_val_loss_attention_ft:.4f}")
    else:
        if not train_loader: print("Skipping fine-tuning as train_loader is not available.")
        if not val_loader: print("Skipping fine-tuning as val_loader is not available (validation is crucial for fine-tuning).")


--- Fine-tuning Attention U-Net (SCSE, mobilenet_v2) ---
Using existing 'model_attention' instance.
Loading weights from /content/drive/MyDrive/U-Net Segmentation Project/CityScapes/saved_models/attention_unet_scse_best_val_mobilenetv2.pth for fine-tuning...
Successfully loaded pre-trained weights.
Initial GPU memory allocated for Fine-tuning Attention U-Net: 107.02 MB
Starting fine-tuning for 5 epochs with LR: 5e-05
Fine-tuning Epoch 1/5. Current LR: 5.00e-05
    Attention U-Net (Fine-tuning) - Batch 297/1488, Loss: 0.1865
    Attention U-Net (Fine-tuning) - Batch 594/1488, Loss: 0.2939
    Attention U-Net (Fine-tuning) - Batch 891/1488, Loss: 0.3239
    Attention U-Net (Fine-tuning) - Batch 1188/1488, Loss: 0.2118
    Attention U-Net (Fine-tuning) - Batch 1485/1488, Loss: 0.3603
  Attention U-Net (Fine-tuning) - Epoch 1 Average Training Loss: 0.2721
  Attention U-Net (Fine-tuning) - Epoch 1 Average Validation Loss: 0.3415
    New best fine-tuned model saved to /content/drive/MyDrive