<a href="https://colab.research.google.com/github/SimoneBorella/semantic-segmentation-domain-adaptation/blob/main/AML_Project4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip3 install torch torchvision torchaudio

In [None]:
!pip3 install 'tqdm'
!pip install thop
!pip install albumentations

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import albumentations as A
import numpy as np
import time
from thop import profile
import sys
from tqdm import tqdm

In [None]:
!git clone https://github.com/Gabrysse/MLDL2024_project1.git

sys.path.append('/content/MLDL2024_project1')
print(os.path.exists('/content/MLDL2024_project1'))


In [None]:
!ls /content/MLDL2024_project1/models/deeplabv2/

deeplabv2.py  __pycache__


In [None]:
# creating the necessary directories if they do not exist
os.makedirs('/content/dataset', exist_ok=True)


# checks if datasets already exist before downloading
if not os.path.exists('/content/dataset/Train.zip'):
    !wget https://zenodo.org/records/5706578/files/Train.zip -P /content/dataset
if not os.path.exists('/content/dataset/Val.zip'):
    !wget https://zenodo.org/records/5706578/files/Val.zip -P /content/dataset

# unzip the downloaded files if they have not been extracted
if not os.path.exists('/content/dataset/Train'):
    !unzip -q /content/dataset/Train.zip -d /content/dataset
if not os.path.exists('/content/dataset/Val'):
    !unzip -q /content/dataset/Val.zip -d /content/dataset


--2024-12-27 17:11:02--  https://zenodo.org/records/5706578/files/Train.zip
Resolving zenodo.org (zenodo.org)... 188.185.48.194, 188.185.43.25, 188.185.45.92, ...
Connecting to zenodo.org (zenodo.org)|188.185.48.194|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4021669263 (3.7G) [application/octet-stream]
Saving to: ‘/content/dataset/Train.zip’


2024-12-27 17:16:13 (12.4 MB/s) - ‘/content/dataset/Train.zip’ saved [4021669263/4021669263]

--2024-12-27 17:16:13--  https://zenodo.org/records/5706578/files/Val.zip
Resolving zenodo.org (zenodo.org)... 188.185.48.194, 188.185.43.25, 188.185.45.92, ...
Connecting to zenodo.org (zenodo.org)|188.185.48.194|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2425958254 (2.3G) [application/octet-stream]
Saving to: ‘/content/dataset/Val.zip’


2024-12-27 17:19:22 (12.3 MB/s) - ‘/content/dataset/Val.zip’ saved [2425958254/2425958254]

--2024-12-27 17:20:22--  https://download.pytorch.org/models/res

In [None]:
# loading the pretrtained models
os.makedirs('pretrained_models', exist_ok=True)

!wget -c --no-check-certificate "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" -O "pretrained_models/DeepLab_resnet_pretrained_imagenet.pth"

print(os.path.exists('pretrained_models/DeepLab_resnet_pretrained_imagenet.pth'))

In [None]:
# checking the contents
!ls /content/dataset/Train
!ls /content/dataset/Val

Rural  Urban
Rural  Urban


In [None]:
from PIL import Image
import torchvision.transforms.functional as F

class LoveDAUrbanDataset(Dataset):
    def __init__(self, root_dir):
        self.images = []
        self.masks = []

        # Paths for urban training data
        image_dir = os.path.join(root_dir, 'Train/Urban/images_png')
        mask_dir = os.path.join(root_dir, 'Train/Urban/masks_png')

        for filename in os.listdir(image_dir):
            if filename.endswith('.png'):
                image_path = os.path.join(image_dir, filename)
                mask_path = os.path.join(mask_dir, filename)

                self.images.append(image_path)
                self.masks.append(mask_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # loading image and mask
        image = torchvision.io.read_image(self.images[idx]).float() / 255.0  # Normalize to [0, 1]
        mask = torchvision.io.read_image(self.masks[idx], mode=torchvision.io.ImageReadMode.GRAY)

        # resizing to 512x512 (consistent dimensions for model input)
        # image = torchvision.transforms.functional.resize(image, size=(512, 512))
        # mask = torchvision.transforms.functional.resize(mask, size=(512, 512), interpolation=Image.NEAREST)

        image = F.resize(image, size=(512, 512))
        mask = F.resize(mask, size=(512, 512), interpolation=Image.NEAREST)

        return image, mask.long()


In [2]:
# data transforms

# def create_transforms():
#    return A.Compose([
#        A.Resize(512, 512),
#        A.Normalize(mean=[0.485, 0.456, 0.406],
#                   std=[0.229, 0.224, 0.225])
#    ])

def create_transforms(mean, std):
    return A.Compose([
        A.Resize(512, 512),
        A.Normalize(mean=mean, std=std),
    ])    # ... to be completed later...

In [None]:
# calculating Mean IoU
def calculate_miou(model, dataloader, num_classes=7, device='cuda'):
    model.eval()
    confusion_matrix = np.zeros((num_classes, num_classes))

    with torch.no_grad():
        for images, masks in tqdm(dataloader, desc="Calculating mIoU"):
            images = images.to(device)
            masks = masks.to(device)

            outputs = model(images)
            preds = outputs.argmax(1)

            # updating the confusion matrix
            for t, p in zip(masks.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    # calculating IoU for each class
    iou_per_class = []
    for i in range(num_classes):
        true_positive = confusion_matrix[i, i]
        false_positive = confusion_matrix[:, i].sum() - true_positive
        false_negative = confusion_matrix[i, :].sum() - true_positive

        iou = true_positive / (true_positive + false_positive + false_negative + 1e-10)
        iou_per_class.append(iou)

    return np.mean(iou_per_class)


In [None]:
# measuring latency
def measure_latency(model, input_size=(1, 3, 512, 512), device='cuda'):
    model.eval()
    x = torch.randn(input_size).to(device)

    # warm-up phase
    for _ in range(10):
        _ = model(x)

    # measuring time
    start_time = time.time()
    with torch.no_grad():
        for _ in range(100):
            _ = model(x)
    end_time = time.time()

    return (end_time - start_time) / 100  # Avg latency

In [None]:
# dataset and dataloader

# transforms = create_transforms()
dataset = LoveDAUrbanDataset('/content/dataset')
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)

In [None]:
# Importing DeepLabV2 from the provided repository

from models.deeplabv2.deeplabv2 import get_deeplab_v2
print("DeepLabv2 model imported :D")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = get_deeplab_v2(
    num_classes=7,
    pretrain=True,
    pretrain_model_path='pretrained_models/DeepLab_resnet_pretrained_imagenet.pth'
)

model = model.to(device)

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
!nvidia-smi
!free -h

Fri Dec 27 17:51:39 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0              26W /  70W |    277MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
scaler = torch.amp.GradScaler('cuda')

# Training loop - ... needs to be fixed ...
print("Starting training...")
for epoch in range(20):
    model.train()
    total_loss = 0

    for images, masks in tqdm(dataloader, desc=f"Epoch {epoch+1}/20"):
        images = images.to(device)
        masks = masks.squeeze().to(device)   # removing extra dimension

        optimizer.zero_grad()
        # forward pass
        with torch.cuda.amp.autocast(device_type='cuda'):
            outputs = model(images)
            # If the model returns a tuple, extract the primary output
            if isinstance(outputs, tuple):
                outputs = outputs[0]
            loss = criterion(outputs, masks)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # Forward pass
        # outputs = model(images)
        # loss = criterion(outputs, masks)

        # Backward pass
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/20], Loss: {avg_loss:.4f}")


In [None]:
 # calculating metrics
print("\nCalculating metrics...")

# 1. Mean IoU
miou = calculate_miou(model, dataloader, device=device)
print(f"Mean IoU: {miou:.4f}")

# 2. Latency
latency = measure_latency(model, device=device)
print(f"Latency: {latency*1000:.2f} ms")

# 3. FLOPs (floating point operations) and Parameters
input_tensor = torch.randn(1, 3, 512, 512).to(device)
flops, params = profile(model, inputs=(input_tensor,))
print(f"FLOPs: {flops/1e9:.2f}G")
print(f"Parameters: {params/1e6:.2f}M")

# saving the results
results = {
    'miou': miou,
    'latency': latency,
    'flops': flops,
    'params': params
}

In [None]:
 # Save model
# torch.save(model.state_dict(), 'deeplabv2_loveda.pth')
# print("\nTraining completed and model saved!")

return results