In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class UNet(nn.Module):
    def __init__(self, num_classes):
        super(UNet, self).__init__()
        
        # Encoder
        self.enc1 = self.conv_block(3, 64)
        self.enc2 = self.conv_block(64, 128)
        self.enc3 = self.conv_block(128, 256)
        self.enc4 = self.conv_block(256, 512)
        
        # Bottleneck
        self.bottleneck = self.conv_block(512, 1024)
        
        # Decoder
        self.dec4 = self.up_conv_block(1024 + 512, 512, 512)
        self.dec3 = self.up_conv_block(512 + 256, 256, 256)
        self.dec2 = self.up_conv_block(256 + 128, 128, 128)
        self.dec1 = self.up_conv_block(128 + 64, 64, 64)
        
        # Final Output Layer
        self.final = nn.Conv2d(64, num_classes, kernel_size=1)

    def up_conv_block(self, in_channels, mid_channels, out_channels):
        """
        Upsampling block with transposed convolution and a convolutional block.
        - `in_channels` is the number of input channels after concatenation.
        - `mid_channels` is the number of intermediate channels (after ConvTranspose2d).
        - `out_channels` is the final number of output channels for the block.
        """
        return nn.Sequential(
            nn.ConvTranspose2d(in_channels, mid_channels, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            self.conv_block(mid_channels, out_channels)
        )

    def conv_block(self, in_channels, out_channels):
        """Convolutional block: Conv -> ReLU -> Conv -> ReLU."""
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        # Encoder path
        e1 = self.enc1(x)          # 64 -> Input size
        e2 = self.enc2(F.max_pool2d(e1, kernel_size=2))  # 128 -> Downsample
        e3 = self.enc3(F.max_pool2d(e2, kernel_size=2))  # 256
        e4 = self.enc4(F.max_pool2d(e3, kernel_size=2))  # 512
        
        # Bottleneck
        b = self.bottleneck(F.max_pool2d(e4, kernel_size=2))  # 1024
        
        # Decoder path
        d4 = self.dec4(torch.cat([F.interpolate(b, size=e4.shape[2:], mode='bilinear', align_corners=True), e4], dim=1))  # 512
        d3 = self.dec3(torch.cat([F.interpolate(d4, size=e3.shape[2:], mode='bilinear', align_corners=True), e3], dim=1))  # 256
        d2 = self.dec2(torch.cat([F.interpolate(d3, size=e2.shape[2:], mode='bilinear', align_corners=True), e2], dim=1))  # 128
        d1 = self.dec1(torch.cat([F.interpolate(d2, size=e1.shape[2:], mode='bilinear', align_corners=True), e1], dim=1))  # 64
        
        # Final output
        out = self.final(d1)  # num_classes
        
        return out

# Hyperparameters
num_classes = 19  # Cityscapes has 19 classes

# Model
model = UNet(num_classes=num_classes)
print(model)

# Example input
x = torch.randn(1, 3, 1024, 2048)  # Batch size = 1, RGB image
y = model(x)
print(y.shape)  # Should output (1, num_classes, 1024, 2048)

UNet(
  (enc1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (enc2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (enc3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (enc4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (bottleneck): Sequential(
    (0): Conv2d(512, 1024, k

In [1]:
import os
import torch
from torchvision import transforms
from torchvision.datasets import Cityscapes
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# Dataset path
data_dir = "data"

# Transformations for the input images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize as per ImageNet
])

# Dataset and DataLoader
train_dataset = Cityscapes(data_dir, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=None)
val_dataset = Cityscapes(data_dir, split='val', mode='fine', target_type='semantic', transform=transform, target_transform=None)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model, Loss, and Optimizer
num_classes = 19  # Cityscapes has 19 classes
model = UNet(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multi-class
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training and Validation Loop
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for images, targets in tqdm(train_loader, desc="Training", leave=False):
        images, targets = images.to(device), targets.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, targets)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    return running_loss / len(train_loader)

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for images, targets in tqdm(val_loader, desc="Validation", leave=False):
            images, targets = images.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, targets)
            
            running_loss += loss.item()
    return running_loss / len(val_loader)

# Training the Model
epochs = 10
best_val_loss = float('inf')
model_path = "unet_cityscapes.pth"

for epoch in range(1, epochs + 1):
    print(f"Epoch {epoch}/{epochs}")
    
    train_loss = train(model, train_loader, optimizer, criterion, device)
    val_loss = validate(model, val_loader, criterion, device)
    
    print(f"Train Loss: {train_loss:.4f} | Validation Loss: {val_loss:.4f}")
    
    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), model_path)
        print(f"Model saved at epoch {epoch} with validation loss {val_loss:.4f}")

print("Training complete. Best model saved.")


NameError: name 'UNet' is not defined