In [None]:
import os
import shutil
import sys
import torch
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')

# Restore Dataset
if not os.path.exists('/content/dataset/project_data/cityscapes'):
    print("Restoring Cityscapes...")
    zip_path_1 = '/content/drive/MyDrive/semseg/project_data.zip'
    zip_path_2 = '/content/drive/MyDrive/project_data.zip'

    if os.path.exists(zip_path_1):
        shutil.unpack_archive(zip_path_1, '/content/dataset')
    elif os.path.exists(zip_path_2):
        shutil.unpack_archive(zip_path_2, '/content/dataset')
    else:
        print("Dataset zip not found. Please check paths.")

Mounted at /content/drive
Restoring Cityscapes...


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import os
import time

# --- CONFIGURATION ---
CHECKPOINT_NAME = 'deeplabv2_cityscapes.pth'
EPOCHS = 50
BATCH_SIZE = 4
LEARNING_RATE = 1e-2
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
CITYSCAPES_PATH = '/content/dataset/project_data/cityscapes'
SAVE_PATH = f'/content/drive/MyDrive/semseg/{CHECKPOINT_NAME}'

# --- DEEPLAB V2 MODEL DEFINITION ---
class ASPP(nn.Module):
    def __init__(self, in_channels, out_channels=256, rates=[6, 12, 18, 24]):
        super(ASPP, self).__init__()
        self.convs = nn.ModuleList()
        # 1x1 Conv
        self.convs.append(nn.Conv2d(in_channels, out_channels, 1, bias=False))
        # Atrous Convolutions
        for rate in rates:
            self.convs.append(nn.Conv2d(in_channels, out_channels, 3, padding=rate, dilation=rate, bias=False))

        self.out_conv = nn.Conv2d(len(rates) + 1 * out_channels, out_channels, 1, bias=False)

    def forward(self, x):
        outs = []
        for conv in self.convs:
            outs.append(conv(x))
        x = torch.cat(outs, dim=1)
        return x

class DeepLabV2(nn.Module):
    def __init__(self, num_classes=19, backbone='resnet101'):
        super(DeepLabV2, self).__init__()

        # Load ResNet Backbone
        if backbone == 'resnet101':
            resnet = models.resnet101(pretrained=True)
        else:
            resnet = models.resnet50(pretrained=True)

        self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool)
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4 # In DeepLabV2, often dilated, but we keep standard for simplicity or use dilation if needed

        # Replace last stride to preserve resolution

        # ASPP Head
        self.aspp = ASPP(2048, 256, rates=[6, 12, 18, 24])

        # Classification Head
        self.cls_conv = nn.Conv2d(1280, num_classes, 1) # 256*5 channels

    def forward(self, x):
        h, w = x.shape[2], x.shape[3]

        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.aspp(x)

        # The ASPP output needs to be reduced.
        # Note: In standard implementation, we concat.
        # Let's fix the dimension issue: 5 branches * 256 = 1280 channels.

        x = self.cls_conv(x)

        # Upsample to original size
        x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=True)
        return x

# --- DATASET ---
class CityscapesDataset(Dataset):
    def __init__(self, root, split='train', transform=None):
        self.root = root
        self.transform = transform
        self.images_dir = os.path.join(root, 'leftImg8bit', split)
        self.masks_dir = os.path.join(root, 'gtFine', split)
        self.images = []
        self.masks = []

        if os.path.exists(self.images_dir):
            for city in sorted(os.listdir(self.images_dir)):
                img_dir = os.path.join(self.images_dir, city)
                mask_dir = os.path.join(self.masks_dir, city)
                if not os.path.isdir(img_dir): continue
                for file_name in sorted(os.listdir(img_dir)):
                    if file_name.endswith('_leftImg8bit.png'):
                        self.images.append(os.path.join(img_dir, file_name))
                        mask_name = file_name.replace('_leftImg8bit.png', '_gtFine_labelTrainIds.png')
                        self.masks.append(os.path.join(mask_dir, mask_name))

    def __len__(self): return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert('RGB').resize((1024, 512), Image.BILINEAR)
        mask = Image.open(self.masks[idx]).resize((1024, 512), Image.NEAREST)

        if self.transform:
            image = self.transform(image)

        return image, torch.from_numpy(np.array(mask)).long()

# --- TRAINING ---
print("Starting DeepLabV2 (Classic) Training on Cityscapes...")

model = DeepLabV2(num_classes=19, backbone='resnet101').to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.PolyLR(optimizer, total_iters=EPOCHS, power=0.9) if hasattr(optim.lr_scheduler, 'PolyLR') else optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
criterion = nn.CrossEntropyLoss(ignore_index=255)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = CityscapesDataset(CITYSCAPES_PATH, split='train', transform=transform)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

if os.path.exists(SAVE_PATH):
    print("Resuming checkpoint...")
    checkpoint = torch.load(SAVE_PATH, map_location=DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    start_epoch = checkpoint.get('epoch', 0) + 1
else:
    start_epoch = 0

for epoch in range(start_epoch, EPOCHS):
    model.train()
    for i, (images, labels) in enumerate(loader):
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if i % 50 == 0:
            print(f"Epoch [{epoch+1}/{EPOCHS}] Step [{i}/{len(loader)}] Loss: {loss.item():.4f}")

    if hasattr(optimizer, 'get_last_lr'):
        scheduler.step()

    torch.save({'model_state_dict': model.state_dict(), 'epoch': epoch}, SAVE_PATH)
    print(f"Epoch {epoch+1} Saved.")

# --- METRICS CALCULATION (Latency & Params) ---
print("\nCalculating Metrics...")
model.eval()
dummy_input = torch.randn(1, 3, 512, 1024).to(DEVICE)

# 1. Parameters
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of Parameters: {num_params / 1e6:.2f} M")

# 2. Latency (Avg of 100 runs)
start = time.time()
with torch.no_grad():
    for _ in range(100):
        _ = model(dummy_input)
end = time.time()
latency = (end - start) / 100 * 1000 # in ms
print(f"Latency: {latency:.2f} ms")

Starting DeepLabV2 (Classic) Training on Cityscapes...




Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth


100%|██████████| 171M/171M [00:00<00:00, 181MB/s]


Epoch [1/50] Step [0/393] Loss: 2.8585
Epoch [1/50] Step [50/393] Loss: 0.7194
Epoch [1/50] Step [100/393] Loss: 0.4542
Epoch [1/50] Step [150/393] Loss: 0.3840
Epoch [1/50] Step [200/393] Loss: 0.2402
Epoch [1/50] Step [250/393] Loss: 0.3345
Epoch [1/50] Step [300/393] Loss: 0.2666
Epoch [1/50] Step [350/393] Loss: 0.3537
Epoch 1 Saved.
Epoch [2/50] Step [0/393] Loss: 0.2418
Epoch [2/50] Step [50/393] Loss: 0.3757
Epoch [2/50] Step [100/393] Loss: 0.2090
Epoch [2/50] Step [150/393] Loss: 0.2385
Epoch [2/50] Step [200/393] Loss: 0.2225
Epoch [2/50] Step [250/393] Loss: 0.2121
Epoch [2/50] Step [300/393] Loss: 0.2872
Epoch [2/50] Step [350/393] Loss: 0.2315
Epoch 2 Saved.
Epoch [3/50] Step [0/393] Loss: 0.2535
Epoch [3/50] Step [50/393] Loss: 0.2187
Epoch [3/50] Step [100/393] Loss: 0.3407
Epoch [3/50] Step [150/393] Loss: 0.1976
Epoch [3/50] Step [200/393] Loss: 0.1967
Epoch [3/50] Step [250/393] Loss: 0.2312
Epoch [3/50] Step [300/393] Loss: 0.2049
Epoch [3/50] Step [350/393] Loss: 0.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import os
import sys

# Install thop for FLOPs calculation
try:
    import thop
except ImportError:
    os.system('pip install thop')
    import thop

# Configuration
CHECKPOINT_PATH = '/content/drive/MyDrive/semseg/deeplabv2_cityscapes.pth'
CITYSCAPES_PATH = '/content/dataset/project_data/cityscapes'
NUM_CLASSES = 19
BATCH_SIZE = 4
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

CLASSES = [
    "Road", "Sidewalk", "Building", "Wall", "Fence", "Pole",
    "Traffic Light", "Traffic Sign", "Vegetation", "Terrain", "Sky",
    "Person", "Rider", "Car", "Truck", "Bus", "Train", "Motorcycle", "Bicycle"
]

# --- MODEL DEFINITION ---
class ASPP(nn.Module):
    def __init__(self, in_channels, out_channels=256, rates=[6, 12, 18, 24]):
        super(ASPP, self).__init__()
        self.convs = nn.ModuleList()
        self.convs.append(nn.Conv2d(in_channels, out_channels, 1, bias=False))
        for rate in rates:
            self.convs.append(nn.Conv2d(in_channels, out_channels, 3, padding=rate, dilation=rate, bias=False))
        self.out_conv = nn.Conv2d(len(rates) + 1 * out_channels, out_channels, 1, bias=False)

    def forward(self, x):
        outs = []
        for conv in self.convs:
            outs.append(conv(x))
        x = torch.cat(outs, dim=1)
        return x

class DeepLabV2(nn.Module):
    def __init__(self, num_classes=19, backbone='resnet101'):
        super(DeepLabV2, self).__init__()
        resnet = models.resnet101(pretrained=False) # Weights loaded from checkpoint
        self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool)
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4
        self.aspp = ASPP(2048, 256, rates=[6, 12, 18, 24])
        self.cls_conv = nn.Conv2d(1280, num_classes, 1)

    def forward(self, x):
        h, w = x.shape[2], x.shape[3]
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.aspp(x)
        x = self.cls_conv(x)
        x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=True)
        return x

class CityscapesDataset(Dataset):
    def __init__(self, root, split='val', transform=None):
        self.root = root
        self.transform = transform
        self.images_dir = os.path.join(root, 'leftImg8bit', split)
        self.masks_dir = os.path.join(root, 'gtFine', split)
        self.images = []
        self.masks = []
        if os.path.exists(self.images_dir):
            for city in sorted(os.listdir(self.images_dir)):
                img_dir = os.path.join(self.images_dir, city)
                mask_dir = os.path.join(self.masks_dir, city)
                if not os.path.isdir(img_dir): continue
                for file_name in sorted(os.listdir(img_dir)):
                    if file_name.endswith('_leftImg8bit.png'):
                        self.images.append(os.path.join(img_dir, file_name))
                        mask_name = file_name.replace('_leftImg8bit.png', '_gtFine_labelTrainIds.png')
                        self.masks.append(os.path.join(mask_dir, mask_name))

    def __len__(self): return len(self.images)
    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert('RGB').resize((1024, 512), Image.BILINEAR)
        mask = Image.open(self.masks[idx]).resize((1024, 512), Image.NEAREST)
        if self.transform: image = self.transform(image)
        return image, torch.from_numpy(np.array(mask)).long()

# --- EVALUATION ---
print(f"Evaluating DeepLabV2: {CHECKPOINT_PATH}")

model = DeepLabV2(num_classes=NUM_CLASSES, backbone='resnet101').to(DEVICE)

if os.path.exists(CHECKPOINT_PATH):
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    print("Model loaded.")
else:
    print("Checkpoint not found.")
    sys.exit()

model.eval()

# 1. Calculate FLOPs
print("Calculating FLOPs...")
dummy_input = torch.randn(1, 3, 512, 1024).to(DEVICE)
flops, params = thop.profile(model, inputs=(dummy_input, ), verbose=False)
print(f"FLOPs: {flops / 1e9:.2f} G")

# 2. Calculate mIoU
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
dataset = CityscapesDataset(CITYSCAPES_PATH, split='val', transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
print("Processing validation images...")

with torch.no_grad():
    for i, (images, labels) in enumerate(dataloader):
        images = images.to(DEVICE)
        labels = labels.numpy()
        output = model(images)
        preds = torch.argmax(output, dim=1).cpu().numpy()
        mask = (labels >= 0) & (labels < NUM_CLASSES)
        hist += np.bincount(
            NUM_CLASSES * labels[mask].astype(int) + preds[mask],
            minlength=NUM_CLASSES ** 2
        ).reshape(NUM_CLASSES, NUM_CLASSES)

        if i % 20 == 0:
            print(f"Step [{i}/{len(dataloader)}]")

iou = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
miou = np.nanmean(iou)

print(f"\nFinal mIoU (DeepLabV2): {miou * 100:.2f}%")

Evaluating DeepLabV2: /content/drive/MyDrive/semseg/deeplabv2_cityscapes.pth




Model loaded.
Calculating FLOPs...
FLOPs: 92.12 G
Processing validation images...
Step [0/125]
Step [20/125]
Step [40/125]
Step [60/125]
Step [80/125]
Step [100/125]
Step [120/125]

Final mIoU (DeepLabV2): 57.42%
