In [1]:
#utility imports
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torchvision import transforms, datasets
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.transforms import functional as F
from PIL import Image
import time
import random
import sys

#N.B. kaggle directories are used in the ipynb
sys.path.append('/kaggle/input/project/MLDL2024_project1-master')
sys.path.append('/kaggle/input/project/MLDL2024_project1-master/datasets')
sys.path.append('/kaggle/input/project/MLDL2024_project1-master/models/bisenet')

from train import train, val
from utils import poly_lr_scheduler, fast_hist, per_class_iou
# Model imports
from models.deeplabv2 import deeplabv2
from models.bisenet import build_bisenet, build_contextpath
from build_bisenet import BiSeNet
# Dataset imports
from cityscapes import CityScapes
from gta5 import GTA5

In [2]:
# 19 semantic classes
num_classes = 19  

# DEEPLAB v2

In [3]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
pretrained_model_path = '/kaggle/input/pretraineddeeplab/deeplab_resnet_pretrained_imagenet.pth'
deeplab_model = deeplabv2.get_deeplab_v2(num_classes=num_classes, pretrain=True, pretrain_model_path=pretrained_model_path)

Deeplab pretraining loading...


In [4]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(deeplab_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [5]:
# Training and Validation Sets: Cityscapes
root_dir = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

# Def of transformations for images and labels
transform_img = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

def transform_label(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label
transform_label = transform_label

#Datasets
train_dataset = CityScapes(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label)
val_dataset = CityScapes(root_dir, split='val', transform_img=transform_img, transform_lab=transform_label)

batch_size = 2

#DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader=DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

# sizes
train_size = len(train_dataset)
val_size = len(val_dataset)
print("Training dataset size:", train_size)
print("Validation dataset size:", val_size)

Training dataset size: 1572
Validation dataset size: 500


In [None]:
# Image printing
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

def denormalize(img, mean, std):
    img = img * std + mean
    return np.clip(img, 0, 1)

#Print image and respective label
def show_image(img, label):
    """ Mostra un'immagine con la sua etichetta. """
    img = img.numpy().transpose((1, 2, 0))  #(C, H, W) --> (H, W, C)
    
    #Denormalization for visualization purposes
    img = denormalize(img, mean, std)
    unique_labels = np.unique(label)

    # Image print
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.title('Image')

    label = train_dataset.decode_target(label)
    # Label print
    plt.subplot(1, 2, 2)
    plt.imshow(label)
    plt.title('Label')
    plt.show()

# Printing 5 images
num_images = 5
count = 0

for images, labels in train_loader:
    for i in range(len(images)):
        show_image(images[i], labels[i])
        count += 1
        if count == num_images:
            break
    if count == num_images:
        break

In [7]:
start_epoch = 0

In [None]:
#Upload already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/deeplab_final_checkpoint.pth')
deeplab_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(deeplab_model.parameters(), lr=1e-3)
print("Start epoch:", start_epoch)
'''

In [None]:
deeplab_model.to(device)

In [None]:
# Training
for i in range(start_epoch, 50):
    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    print(f'Epoch: {i}')
    train(deeplab_model, optimizer, train_loader, loss_fn, device)
    print(f"Current Learning Rate: {current_lr:.6f}")
    torch.save({
            'model_state_dict' : deeplab_model.state_dict(),
            'optimizer_state_dict' : optimizer.state_dict(),
            'epoch' : i,
            'lr': current_lr
        }, '/kaggle/working/deeplab_checkpoint.pth')

In [8]:
# Validation
val_accuracy = val(deeplab_model, val_loader, device)

{'Final mIoU:': 0.5699565805376476}


In [None]:
!pip install -U fvcore

In [10]:
#FLOPs
from fvcore.nn import FlopCountAnalysis, flop_count_table

height, width = 512, 1024  # Sample input size
sample_input = torch.zeros((1, 3, height, width)).to(device)
flops = FlopCountAnalysis(deeplab_model, sample_input)
print(f"FLOPs: {flop_count_table(flops)}")

FLOPs: | module                         | #parameters or shape   | #flops     |
|:-------------------------------|:-----------------------|:-----------|
| model                          | 43.901M                | 0.375T     |
|  conv1                         |  9.408K                |  1.233G    |
|   conv1.weight                 |   (64, 3, 7, 7)        |            |
|  bn1                           |  0.128K                |  16.777M   |
|   bn1.weight                   |   (64,)                |            |
|   bn1.bias                     |   (64,)                |            |
|  layer1                        |  0.216M                |  7.155G    |
|   layer1.0                     |   75.008K              |   2.487G   |
|    layer1.0.conv1              |    4.096K              |    0.136G  |
|    layer1.0.bn1                |    0.128K              |    4.244M  |
|    layer1.0.conv2              |    36.864K             |    1.222G  |
|    layer1.0.bn2                |    0.128K

In [11]:
#Latency
height = 512
width = 1024
image = torch.rand(1, 3, height, width)
iterations = 1000
latency = []
FPS = []

for i in range(iterations):
    start = time.time()
    output = deeplab_model(image.to(device))
    end = time.time()
    l = end - start
    latency.append(l)
    fps = 1/l
    FPS.append(fps)

meanLatency = np.mean(latency)*1000
stdLatency = np.std(latency)*1000
meanFPS = np.mean(FPS)
stdFPS = np.std(FPS)

print(f'Mean Latency {meanLatency}\n STD Latency {stdLatency}\n Mean FPS {meanFPS}\n STD FPS : {stdFPS}')

Mean Latency 246.16708111763
 STD Latency 8.362497980959024
 Mean FPS 4.114738322070233
 STD FPS : 1.7446555297303612


# BISENET

* Train & Validation: Cityscapes

In [5]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [13]:
bisenet_model = BiSeNet(num_classes=num_classes, context_path='resnet18')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 133MB/s] 
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 138MB/s]  


In [14]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [15]:
# Training and Validation Sets: Cityscapes
root_dir = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

# Def of transformations for images and labels
transform_img = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

def transform_label(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label
transform_label = transform_label

# Datasets
train_dataset = CityScapes(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label)
val_dataset=CityScapes(root_dir, split='val', transform_img=transform_img, transform_lab=transform_label)

batch_size = 4

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader=DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

# sizes
train_size = len(train_dataset)
val_size = len(val_dataset)
print("Training dataset size:", train_size)
print("Validation dataset size:", val_size)

Training dataset size: 1572
Validation dataset size: 500


In [None]:
start_epoch = 0

In [None]:
# Upload the already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/bisenet_final_checkpoint.pth')
bisenet_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
print(start_epoch)
'''

In [None]:
bisenet_model.to(device)

In [None]:
# Training
for i in range(start_epoch,50):
    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    print(f'Epoch: {i}')
    train(bisenet_model, optimizer, train_loader, loss_fn,device)
    print(f"Current Learning Rate: {current_lr:.6f}")
    torch.save({
          'model_state_dict' : bisenet_model.state_dict(),
          'optimizer_state_dict' : optimizer.state_dict(),
          'epoch' : i,
          'lr': current_lr
      }, '/kaggle/working/bisenet_checkpoint.pth')

In [19]:
# Validation
val_accuracy = val(bisenet_model, val_loader, device)

{'Final mIoU:': 0.5248672576178455}


In [20]:
# FLOPs
height, width = 512, 1024  # Sample input size
sample_input = torch.zeros((1, 3, height, width)).to(device)
flops = FlopCountAnalysis(bisenet_model, sample_input)
print(f"FLOPs: {flop_count_table(flops)}")

FLOPs: | module                                      | #parameters or shape   | #flops     |
|:--------------------------------------------|:-----------------------|:-----------|
| model                                       | 12.582M                | 25.78G     |
|  saptial_path                               |  0.371M                |  5.088G    |
|   saptial_path.convblock1                   |   1.856K               |   0.243G   |
|    saptial_path.convblock1.conv1            |    1.728K              |    0.226G  |
|    saptial_path.convblock1.bn               |    0.128K              |    16.777M |
|   saptial_path.convblock2                   |   73.984K              |   2.424G   |
|    saptial_path.convblock2.conv1            |    73.728K             |    2.416G  |
|    saptial_path.convblock2.bn               |    0.256K              |    8.389M  |
|   saptial_path.convblock3                   |   0.295M               |   2.42G    |
|    saptial_path.convblock3.conv1            |

In [21]:
#Latency
height = 512
width = 1024
image = torch.rand(1, 3, height, width)
iterations = 1000
latency = []
FPS = []
for i in range(iterations):
    start = time.time()
    output = bisenet_model(image.to(device))
    end = time.time()
    l = end - start
    latency.append(l)
    fps = 1/l
    FPS.append(fps)

meanLatency = np.mean(latency)*1000
stdLatency = np.std(latency)*1000
meanFPS = np.mean(FPS)
stdFPS = np.std(FPS)

print(f'Mean Latency {meanLatency}\n STD Latency {stdLatency}\n Mean FPS {meanFPS}\n STD FPS : {stdFPS}')

Mean Latency 17.143022537231445
 STD Latency 0.8200593395203266
 Mean FPS 58.458662595582865
 STD FPS : 2.877964082072746


* Train: GTA5, Validation: Cityscapes

In [3]:
bisenet_model = BiSeNet(num_classes=num_classes, context_path='resnet18')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 140MB/s] 
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 159MB/s]  


In [6]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [8]:
# Training Set: GTA5
root_dir = '/kaggle/input/gtadataset/GTA5'

# Def of transformations for images and labels for training set
transform_img = transforms.Compose([
    transforms.Resize((720, 1280)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

def transform_label(label):
    label = F.resize(label, (720, 1280), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label

transform_label = transform_label

# Dataset
train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label)
batch_size = 4

# DataLoader
train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# size
train_size = len(train_dataset)
print("Training dataset size:", train_size)

Training dataset size: 2500


In [None]:
start_epoch = 0

In [None]:
# Upload the already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/gta_bisenet_final_checkpoint.pth')
bisenet_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
print(start_epoch)
'''

In [None]:
bisenet_model.to(device)

In [None]:
# Training
for i in range(start_epoch,50):
    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    print(f'Epoch: {i}')
    train(bisenet_model, optimizer, train_loader_GTA5, loss_fn,device)
    print(f"Current Learning Rate: {current_lr:.6f}")
    torch.save({
          'model_state_dict' : bisenet_model.state_dict(),
          'optimizer_state_dict' : optimizer.state_dict(),
          'epoch' : i,
          'lr': current_lr
      }, '/kaggle/working/bisenet_checkpoint.pth')

In [10]:
# Validation Set: Cityscapes
root_dir_val = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

# Def of transformations for images and labels for validation set
transform_img = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                         std = [0.229, 0.224, 0.225])

])

def transform_label(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label
transform_label = transform_label
batch_size=4
# Dataset
val_dataset = CityScapes(root_dir_val, split='val', transform_img=transform_img,transform_lab=transform_label)
# Dataloader
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

# size
val_size = len(val_dataset)
print("Validation dataset size:", val_size)

Validation dataset size: 500


In [11]:
# Validation
classes = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'light', 'sign','vegetation', 'terrain', 'sky','person',
        'rider', 'car','truck','bus','train', 'motocycle','bicycle'] 

val = val(bisenet_model, val_loader, device)
for i in range(len(classes)) : 
    print(f'{classes[i]} : {val[i]*100:.4f}%')

{'Final mIoU:': 0.15122540502980422}
road : 8.3543%
sidewalk : 1.2864%
building : 37.4049%
wall : 1.7961%
fence : 6.4966%
pole : 12.8021%
light : 13.9396%
sign : 2.2340%
vegetation : 73.4990%
terrain : 12.7513%
sky : 57.9718%
person : 23.6138%
rider : 0.0000%
car : 30.0205%
truck : 2.1330%
bus : 3.0157%
train : 0.0093%
motocycle : 0.0000%
bicycle : 0.0000%


# BISENET AUG1 (HORIZONTAL FLIP)

In [12]:
bisenet_model = BiSeNet(num_classes=num_classes, context_path='resnet18')

In [13]:
# Multiple GPU 
device= "cuda" if torch.cuda.is_available() else "cpu"
bisenet_model=bisenet_model.to(device)
if torch.cuda.device_count()>1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    bisenet_model=nn.DataParallel(bisenet_model)

Using 2 GPUs!


In [14]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [6]:
# Training Set: GTA5
root_dir = '/kaggle/input/gtadataset/GTA5'

# Def of transformations for images and labels for training set
transform_img = transforms.Compose([
    transforms.Resize((720, 1280)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

def transform_label(label):
    label = F.resize(label, (720, 1280), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label

# Augumentation
def transform_paired(image, label) :
    random_aug = random.random()
    random_Crop_or_Flip = random.random()
    if random_aug < 0.5: 
        image = F.hflip(image)
        label = F.hflip(label)
    
    return image, label

transform_label = transform_label
transform_paired = transform_paired

# Dataset
train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label,transform_paired=transform_paired)
batch_size = 8

# DataLoader
train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# size
train_size = len(train_dataset)
print("Training dataset size:", train_size)

Training dataset size: 2500


In [None]:
start_epoch = 0

In [None]:
# Upload the already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/gta_bisenet_aug1_final_checkpoint.pth')
bisenet_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
print("Start Epoch:", start_epoch)
'''

In [None]:
# Training
for i in range(start_epoch, 50):
    #virtually expanded
    train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label,transform_paired=transform_paired)
    train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    print(f'Epoch: {i}')
    train(bisenet_model, optimizer, train_loader_GTA5, loss_fn, device, is_gta5 = True)
    print(f"Current Learning Rate: {current_lr:.6f}")
    torch.save({
      'model_state_dict' : bisenet_model.state_dict(),
      'optimizer_state_dict' : optimizer.state_dict(),
      'epoch' : i,
      'lr': current_lr
    }, '/kaggle/working/gta_bisenet_aug1_nocrop_checkpoint_50.pth')

In [8]:
# Validation Set: Cityscapes
root_dir_val = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

# Def of transformations for images and labels for validation set
transform_img = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                         std = [0.229, 0.224, 0.225])

])

def transform_label(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label
transform_label = transform_label

# Dataset
val_dataset = CityScapes(root_dir_val, split='val', transform_img=transform_img,transform_lab=transform_label)
# Dataloader
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

# size
val_size = len(val_dataset)
print("Validation dataset size:", val_size)

Validation dataset size: 500


In [9]:
# Validation
classes = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'light', 'sign','vegetation', 'terrain', 'sky','person',
        'rider', 'car','truck','bus','train', 'motocycle','bicycle'] 

val = val(bisenet_model, val_loader, device)
for i in range(len(classes)) : 
    print(f'{classes[i]} : {val[i]*100:.4f}%')

{'Final mIoU:': 0.1757125084072001}
road : 34.0390%
sidewalk : 4.0214%
building : 44.9786%
wall : 9.6315%
fence : 4.1427%
pole : 13.8076%
light : 10.4628%
sign : 1.9014%
vegetation : 70.6853%
terrain : 6.9042%
sky : 61.6067%
person : 29.7800%
rider : 0.4139%
car : 36.4590%
truck : 3.9542%
bus : 0.4199%
train : 0.0000%
motocycle : 0.6434%
bicycle : 0.0022%


# BISENET AUG2 (VISUAL TRASFORMATIONS)

In [3]:
bisenet_model = BiSeNet(num_classes=num_classes, context_path='resnet18')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 134MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 166MB/s]  


In [4]:
# Multiple GPU 
device= "cuda" if torch.cuda.is_available() else "cpu"
bisenet_model=bisenet_model.to(device)
if torch.cuda.device_count()>1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    bisenet_model=nn.DataParallel(bisenet_model)

Using 2 GPUs!


In [5]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [6]:
# Training Set: GTA5
root_dir = '/kaggle/input/gtadataset/GTA5'

class ColorJitter(transforms.ColorJitter):
    def __call__(self, image, target):
        return super().__call__(image), target    

# Augmentation
transform_img = transforms.Compose([
    transforms.Resize((720, 1280)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225]),
    transforms.RandomApply([transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05)], p=0.5),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 0.1))], p=0.5),
    transforms.RandomApply([transforms.Lambda(lambda img: img + torch.randn_like(img) * 0.02)], p=0.5), 
    #transforms.RandomApply([transforms.Lambda(lambda img: transforms.functional.adjust_gamma(img, gamma=0.8))], p=0.5), #gives problems with gaussian blur
])

# Def of transformations for labels for training set
def transform_label(label):
    label = F.resize(label, (720, 1280), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label

transform_label = transform_label

#dataset
train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label)
batch_size = 8

# DataLoader
train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# size
train_size = len(train_dataset)
print("Training dataset size:", train_size)

Training dataset size: 2500


In [None]:
start_epoch = 0

In [None]:
#Upload the already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/gta_bisenet_aug2_final_checkpoint.pth')
bisenet_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
print(start_epoch)
'''

In [None]:
# Training
for i in range(start_epoch, 50):
    #virtually expanded
    train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label)
    train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    print(f'Epoch: {i}')
    train(bisenet_model, optimizer, train_loader_GTA5, loss_fn, device, is_gta5 = True)
    print(f"Current Learning Rate: {current_lr:.6f}")
    torch.save({
          'model_state_dict' : bisenet_model.state_dict(),
          'optimizer_state_dict' : optimizer.state_dict(),
          'epoch' : i,
          'lr': current_lr
        }, '/kaggle/working/gta_bisenet_aug2_checkpoint.pth')      

In [8]:
# Validation Set: Cityscapes
root_dir_val = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

# Def of transformations for images and labels for validation set
transform_img = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])

])

def transform_label(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label
transform_label = transform_label

#Dataset
val_dataset = CityScapes(root_dir_val, split='val', transform_img=transform_img,transform_lab=transform_label)
#Dataloader
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)
# size
val_size = len(val_dataset)
print("Validation dataset size:", val_size)

Validation dataset size: 500


In [9]:
# Validation
classes = ['road', 'sidewalk', 'building', 'wall', 'fence','pole', 'light', 'sign','vegetation', 'terrain', 'sky','person',
        'rider', 'car','truck','bus','train', 'motocycle','bicycle'] 

val = val(bisenet_model, val_loader, device)
for i in range(len(classes)) : 
    print(f'{classes[i]} : {val[i]*100:.4f}%')

{'Final mIoU:': 0.21005380173728802}
road : 31.0664%
sidewalk : 16.2279%
building : 61.8424%
wall : 9.9058%
fence : 5.2766%
pole : 20.3691%
light : 19.4529%
sign : 4.8772%
vegetation : 72.8250%
terrain : 5.7645%
sky : 80.6103%
person : 30.0358%
rider : 0.1447%
car : 28.8470%
truck : 10.3221%
bus : 0.0067%
train : 0.0000%
motocycle : 1.5280%
bicycle : 0.0000%


# BISENET AUG1 + AUG2

In [4]:
bisenet_model = BiSeNet(num_classes=num_classes, context_path='resnet18')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 129MB/s] 
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 134MB/s]  


In [11]:
# Multiple GPU
device= "cuda" if torch.cuda.is_available() else "cpu"
bisenet_model=bisenet_model.to(device)
if torch.cuda.device_count()>1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    bisenet_model=nn.DataParallel(bisenet_model)

Using 2 GPUs!


In [5]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [6]:
# Training Set: GTA5
root_dir = '/kaggle/input/gtadataset/GTA5'

# Def of transformations for images and labels for training set
transform_img = transforms.Compose([
    transforms.Resize((720, 1280)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])

])

def transform_label(label):
    label = F.resize(label, (720, 1280), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label

#Augmentation
def transform_paired(image, label):
    random_aug = random.random()

    if random_aug < 0.5:
        image = F.hflip(image)
        label = F.hflip(label)
        
        image = transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05)(image)
        image = transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 0.1))(image)
        image = transforms.Lambda(lambda img: img + torch.randn_like(img) * 0.02)(image)
   
    return image, label

transform_label = transform_label
transform_paired = transform_paired

# Dataset
train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label,transform_paired=transform_paired)
batch_size = 8
# DataLoader
train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# size
train_size = len(train_dataset)
print("Training dataset size:", train_size)

Training dataset size: 2500


In [None]:
start_epoch = 0

In [None]:
# Upload the already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/gta_bisenet_aug12_final_checkpoint.pth')
bisenet_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
print("Start Epoch:", start_epoch)
'''

In [None]:
# Training
for i in range(start_epoch, 50):
    #virtually expanded
    train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label,transform_paired=transform_paired)
    train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    print(f'Epoch: {i}')
    train(bisenet_model, optimizer, train_loader_GTA5, loss_fn,device, is_gta5 = True)
    print(f"Current Learning Rate: {current_lr:.6f}")
    torch.save({
      'model_state_dict' : bisenet_model.state_dict(),
      'optimizer_state_dict' : optimizer.state_dict(),
      'epoch' : i,
      'lr': current_lr
    }, '/kaggle/working/gta_bisenet_aug12_checkpoint.pth')

In [8]:
# Validation Set: Cityscapes
root_dir_val = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

# Def of transformations for images and labels for validation set
transform_img = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

def transform_label(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label
transform_label = transform_label

# Dataset
val_dataset = CityScapes(root_dir_val, split='val', transform_img=transform_img,transform_lab=transform_label)
# Dataloader
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

# size
val_size = len(val_dataset)
print("Validation dataset size:", val_size)

Validation dataset size: 500


In [11]:
# Validation
classes = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'light', 'sign','vegetation', 'terrain', 'sky','person',
        'rider', 'car','truck','bus','train', 'motocycle','bicycle'] 

val = val(bisenet_model, val_loader, device)
for i in range(len(classes)) : 
    print(f'{classes[i]} : {val[i]*100:.4f}%')

{'Final mIoU:': 0.23138683399844664}
road : 42.5805%
sidewalk : 13.0382%
building : 71.3595%
wall : 16.1009%
fence : 10.0409%
pole : 17.3282%
light : 16.6782%
sign : 4.9166%
vegetation : 77.3120%
terrain : 17.5203%
sky : 80.6919%
person : 33.4542%
rider : 1.2103%
car : 20.4097%
truck : 9.5867%
bus : 2.8110%
train : 0.0000%
motocycle : 4.5907%
bicycle : 0.0050%


# FDA

In [3]:
bisenet_model = BiSeNet(num_classes=num_classes, context_path='resnet18')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 140MB/s] 
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 145MB/s]  


In [4]:
#Multiple GPU
device= "cuda" if torch.cuda.is_available() else "cpu"
bisenet_model=bisenet_model.to(device)
if torch.cuda.device_count()>1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    bisenet_model=nn.DataParallel(bisenet_model) 

Using 2 GPUs!


In [7]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [8]:
# Training Set: GTA5
root_dir = '/kaggle/input/gtadataset/GTA5'

transform_img = transforms.Compose([
    transforms.Resize((720, 1280)),
    transforms.ToTensor(),
])

def transform_label(label):
    label = F.resize(label, (720, 1280), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label

def transform_paired(image, label):
    random_aug = random.random()

    if random_aug < 0.5:
        image = F.hflip(image)
        label = F.hflip(label)

    return image, label

transform_label = transform_label
transform_paired = transform_paired

train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label, transform_paired=transform_paired)
batch_size = 4

# Create a DataLoader for the training dataset
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_size = len(train_dataset)

print("Training dataset size:", train_size)

Training dataset size: 2500


In [9]:
# Cityscapes Dataset
root_dir_val = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

City_dataset = CityScapes(root_dir_val, split='train', transform_img=transform_img,transform_lab=transform_label)
City_loader = DataLoader(City_dataset, batch_size=4, num_workers=2)

In [10]:
# FDA Functions
def low_freq_mutate_np( amp_src, amp_trg, L=0.1 ):
    a_src = np.fft.fftshift( amp_src, axes=(-2, -1) )
    a_trg = np.fft.fftshift( amp_trg, axes=(-2, -1) )

    _, h, w = a_src.shape
    b = (  np.floor(np.amin((h,w))*L)  ).astype(int)
    c_h = np.floor(h/2.0).astype(int)
    c_w = np.floor(w/2.0).astype(int)

    h1 = c_h-b
    h2 = c_h+b+1
    w1 = c_w-b
    w2 = c_w+b+1

    a_src[:,h1:h2,w1:w2] = a_trg[:,h1:h2,w1:w2]
    a_src = np.fft.ifftshift( a_src, axes=(-2, -1) )
    return a_src

def FDA_source_to_target_np( src_img, trg_img, L=0.1 ):
    src_img_np = src_img 
    trg_img_np = trg_img 

    fft_src_np = np.fft.fft2( src_img_np, axes=(-2, -1) )
    fft_trg_np = np.fft.fft2( trg_img_np, axes=(-2, -1) )

    amp_src, pha_src = np.abs(fft_src_np), np.angle(fft_src_np)
    amp_trg, pha_trg = np.abs(fft_trg_np), np.angle(fft_trg_np)

    amp_src_ = low_freq_mutate_np( amp_src, amp_trg, L=L )

    fft_src_ = amp_src_ * np.exp( 1j * pha_src )

    src_in_trg = np.fft.ifft2( fft_src_, axes=(-2, -1) )
    src_in_trg = np.real(src_in_trg)

    return src_in_trg

In [None]:
start_epoch = 0

In [11]:
# FDA Transform & DACS Training Functions
def FDA_Transform(src_img_batch, src_lbl_batch, trg_img_batch, trg_lbl_batch) : 
    transform_aug = transforms.Compose([
        transforms.RandomApply([transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05)], p=0.5),
        transforms.RandomApply([transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 0.1))], p=0.5),
        transforms.RandomApply([transforms.Lambda(lambda img: img + torch.randn_like(img) * 0.02)], p=0.5), 
    ])

    src_img_batch = np.asarray(src_img_batch.cpu(), np.float32)
    trg_img_batch = np.asarray(trg_img_batch.cpu(), np.float32)
    src_in_trg_batch = np.array([FDA_source_to_target_np(src_img, trg_img, L=0.01) for src_img, trg_img in zip(src_img_batch, trg_img_batch)])
    
    src_in_trg_tensor = torch.from_numpy(src_in_trg_batch)
    src_in_trg_tensor = transform_aug(src_in_trg_tensor)
    src_in_trg_tensor = src_in_trg_tensor.numpy()

    src_in_trg_tensor = src_in_trg_tensor.transpose((0, 2, 3, 1))
    src_in_trg_batch = src_in_trg_batch.transpose((0, 2, 3, 1))
    trg_img_batch = trg_img_batch.transpose((0, 2, 3, 1))
    src_img_batch = src_img_batch.transpose((0, 2, 3, 1))
    
    return src_in_trg_tensor, src_in_trg_batch, trg_img_batch, src_img_batch 

def train_FDA(model, optimizer, dataloader_GTA, val_loader, loss_fn, device, is_gta5=False):
    model.train()
    hist = np.zeros((19, 19))

    IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32)
    IMG_MEAN = torch.reshape(torch.from_numpy(IMG_MEAN), (1, 3, 1, 1))
    mean_img = torch.zeros(1, 1)

    val_iter = iter(val_loader)

    for batch_idx, (src_img_batch, src_lbl_batch) in enumerate(dataloader_GTA):
        try:
            trg_img_batch, trg_lbl_batch = next(val_iter)
        except StopIteration:
            val_iter = iter(val_loader)
            trg_img_batch, trg_lbl_batch = next(val_iter)
            
        src_img_batch, src_lbl_batch = src_img_batch.to(device), src_lbl_batch.to(device).long()
        trg_img_batch, trg_lbl_batch = trg_img_batch.to(device), trg_lbl_batch.to(device).long()
        
        if is_gta5: 
            src_lbl_batch[src_lbl_batch > 18] = 255
            
        src_in_trg_tensor, src_in_trg_batch, trg_img_batch, src_img_batch  = FDA_Transform(src_img_batch, src_lbl_batch, trg_img_batch, trg_lbl_batch)
        src_in_trg_tensor = src_in_trg_tensor.transpose((0, 3, 1, 2))
        src_in_trg_tensor = torch.tensor(src_in_trg_tensor, dtype=torch.float32)
        
        normalize = transforms.Normalize(mean=mean, std=std)
        src_in_trg_tensor = normalize(src_in_trg_tensor)
        
        outputs = model(src_in_trg_tensor)
        src_lbl_batch = src_lbl_batch.to(outputs[0].device)
        loss = loss_fn(outputs[0], src_lbl_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        _, predicted = outputs[0].max(1)
        
        if batch_idx % 100 == 0 : 
            hist += fast_hist(src_lbl_batch.cpu().flatten().numpy(), predicted.cpu().flatten().numpy(), 19)
            print(per_class_iou(hist))
            miou = np.mean(per_class_iou(hist)) 
            print(f'Partial mIoU at batch {batch_idx} = {miou}') 
            
        hist += fast_hist(src_lbl_batch.cpu().flatten().numpy(), predicted.cpu().flatten().numpy(), 19)
        miou = np.mean(per_class_iou(hist)) 
 

    print(per_class_iou(hist))
    miou = np.mean(per_class_iou(hist))
    print({"Final mIoU:": miou})

In [None]:
# Upload the already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/gta_bisenet_FDA_checkpoint_final.pth')
bisenet_model.load_state_dict(checkpoint['model_state_dict'], strict = False)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
print("Start Epoch:", start_epoch)
'''

In [None]:
# Training
for i in range(start_epoch, 50):
    train_dataset = GTA5(root_dir, split='train', transform_img=transform_img, transform_lab=transform_label,transform_paired=transform_paired)
    train_loader_GTA5 = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    print(f'Epoch: {i}')
    train_FDA(bisenet_model, optimizer, train_loader_GTA5, City_loader, loss_fn, device, is_gta5 = True)
    torch.save({
      'model_state_dict' : bisenet_model.state_dict(),
      'optimizer_state_dict' : optimizer.state_dict(),
      'epoch' : i,
      'lr': current_lr
    }, '/kaggle/working/gta_bisenet_FDA_checkpoint.pth')
    print(f"Current Learning Rate: {current_lr:.6f}")


In [13]:
# Validation
classes = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'light', 'sign','vegetation', 'terrain', 'sky','person',
        'rider', 'car','truck','bus','train', 'motocycle','bicycle'] 

# Validation Set: Cityscapes
val_dataset = CityScapes(root_dir_val, split='val', transform_img=transform_img,transform_lab=transform_label)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

val = val(bisenet_model, val_loader, device)
for i in range(len(classes)) : 
    print(f'{classes[i]} : {val[i]*100:.4f}%')

{'Final mIoU:': 0.26429145032691437}
road : 77.6584%
sidewalk : 21.5107%
building : 73.2378%
wall : 19.9345%
fence : 6.2037%
pole : 21.4600%
light : 12.2506%
sign : 5.0337%
vegetation : 79.6623%
terrain : 20.0299%
sky : 80.5993%
person : 29.2181%
rider : 0.4521%
car : 41.4112%
truck : 7.2434%
bus : 2.8852%
train : 0.0000%
motocycle : 3.3183%
bicycle : 0.0445%


# DACS

In [3]:
bisenet_model = BiSeNet(num_classes=num_classes, context_path='resnet18')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 143MB/s] 
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 155MB/s]  


In [4]:
# Multiple GPU
device= "cuda" if torch.cuda.is_available() else "cpu"
bisenet_model=bisenet_model.to(device)
if torch.cuda.device_count()>1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    bisenet_model=nn.DataParallel(bisenet_model)

Using 2 GPUs!


In [5]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=255).to(device)
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
init_lr = 1e-3

In [6]:
#Function to generate masks
def generate_cutout_mask(img_size, seed=None):
    np.random.seed(seed)
    cutout_area = img_size[0] * img_size[1] / 2
    w = np.random.randint(img_size[1] / 2, img_size[1] + 1)
    h = np.round(cutout_area / w)
    x_start = np.random.randint(0, img_size[1] - w + 1)
    y_start = np.random.randint(0, img_size[0] - h + 1)
    x_end = int(x_start + w)
    y_end = int(y_start + h)
    mask = np.ones(img_size)
    mask[y_start:y_end, x_start:x_end] = 0
    return mask.astype(float)

# mix Function
def oneMix(mask, data=None, target=None):
    if data is not None:
        stackedMask0, _ = torch.broadcast_tensors(mask, data[0])
        data = (stackedMask0 * data[0] + (1 - stackedMask0) * data[1])
    if target is not None:
        stackedMask0, _ = torch.broadcast_tensors(mask, target[0])
        target = (stackedMask0 * target[0] + (1 - stackedMask0) * target[1])
    return data, target

# Transform definition GTA5
transform_img_gta5 = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

def transform_label_gta5(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label

# Transform definition Cityscapes
transform_img_cityscapes = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

def transform_label_cityscapes(label):
    label = F.resize(label, (512, 1024), interpolation=Image.NEAREST)
    label = np.array(label, dtype=np.int64)
    return label

# Function that selects a class
def select_class_from_label(label, class_id):
    mask = label == class_id
    return mask

# Function to mix selected classes
def mix_classes(city_image, city_label, gta_image, gta_label, class_id):
    gta_mask = select_class_from_label(gta_label, class_id)
    gta_mask = torch.tensor(gta_mask, dtype=torch.float32).clone().detach()
    gta_mask = gta_mask.unsqueeze(0)  # Aggiunge una dimensione per il broadcasting
    gta_image = gta_image * gta_mask
    gta_label = gta_label * gta_mask.squeeze(0)
    city_image_mixed = gta_mask * gta_image + (1 - gta_mask) * city_image
    city_label_mixed = gta_mask * gta_label + (1 - gta_mask) * city_label
    return city_image_mixed, city_label_mixed

def transform_paired(image, label):
    random_aug = random.random()
    if random_aug < 0.5:
        image = F.hflip(image)
        label = F.hflip(label)
    return image, label

In [7]:
# Dataset & Dataloaders
root_dir_gta5 = '/kaggle/input/gtadataset/GTA5'
root_dir_cityscapes = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'

batch_size = 4

# Datasets
train_dataset_gta5 = GTA5(root_dir_gta5, split='train', transform_img=transform_img_gta5, transform_lab=transform_label_gta5)
train_dataset_cityscapes = CityScapes(root_dir_cityscapes, split='train', transform_img=transform_img_cityscapes, transform_lab=transform_label_cityscapes)
# Dataloaders
train_loader_gta5 = DataLoader(train_dataset_gta5, batch_size = batch_size, shuffle=True)
train_loader_cityscapes = DataLoader(train_dataset_cityscapes, batch_size = batch_size, shuffle=True)

# Sizes
train_size_gta5 = len(train_dataset_gta5)
train_size_cityscapes = len(train_dataset_cityscapes)
print("Training dataset size (GTA5):", train_size_gta5)
print("Training dataset size (Cityscapes):", train_size_cityscapes)

Training dataset size (GTA5): 2500
Training dataset size (Cityscapes): 1572


In [16]:
# DACS Training Function
def train_DACS(model, optimizer, dataloader_GTA, val_loader, loss_fn, device, is_gta5=False):
    model.train()
    hist = np.zeros((19, 19))
    train_dataset_gta5 = GTA5(root_dir_gta5, split='train', transform_img=transform_img_gta5, transform_lab=transform_label_gta5)

    val_iter = iter(val_loader)

    for batch_idx, (gta_images, gta_labels) in enumerate(dataloader_GTA):
        #print('Batch ', batch_idx)
        try:
            city_images, city_labels = next(val_iter)
        except StopIteration:
            val_iter = iter(val_loader)
            city_images, city_labels = next(val_iter)

        gta_images, gta_labels = gta_images.to(device), gta_labels.to(device).long()
        city_images, city_labels = city_images.to(device), city_labels.to(device).long()

        batch, channels, height, width = city_images.shape
        city_images_tensor = []
        city_labels_tensor = []

        for city_image, city_label, gta_image, gta_label in zip(city_images, city_labels, gta_images, gta_labels) :
            class_Ids = list(range(0, 20))
            class_id = random.choice(class_Ids)
            class_Ids.remove(class_id)

            mixed_image, mixed_label = mix_classes(city_image, city_label, gta_image, gta_label, class_id)
            for class_id in random.sample(class_Ids, k=len(class_Ids)//2):
                mixed_image, mixed_label = transform_paired(mixed_image, mixed_label)
                mixed_label = mixed_label.long()
                mixed_image, mixed_label = mix_classes(mixed_image, mixed_label, gta_image, gta_label, class_id)

            mixed_image, mixed_label = transform_paired(mixed_image, mixed_label)
            mixed_label = mixed_label.long()
            city_images_tensor.append(mixed_image)
            city_labels_tensor.append(mixed_label)

        city_images_tensor = torch.stack(city_images_tensor)
        city_labels_tensor = torch.stack(city_labels_tensor)
        city_labels_tensor = city_labels_tensor.squeeze(1)

        outputs = model(city_images_tensor)
        city_labels_tensor = city_labels_tensor.to(outputs[0].device)
        loss = loss_fn(outputs[0], city_labels_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        _, predicted = outputs[0].max(1)
        hist += fast_hist(city_labels_tensor.cpu().flatten().numpy(), predicted.cpu().flatten().numpy(), 19)
    print(per_class_iou(hist))
    miou = np.mean(per_class_iou(hist))
    print({"Final mIoU:": miou})

In [None]:
# Upload the already trained model
'''
checkpoint = torch.load('/kaggle/input/project/MLDL2024_project1-master/gta_bisenet_DACS_checkpoint_final.pth')
bisenet_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_lr = checkpoint['lr']
start_epoch = checkpoint['epoch'] + 1
optimizer = torch.optim.Adam(bisenet_model.parameters(), lr=1e-3)
print("Start Epoch:", start_epoch)
'''

In [None]:
start_epoch = 0

In [None]:
train_dataset_gta5 = GTA5(root_dir_gta5, split='train', transform_img=transform_img_gta5, transform_lab=transform_label_gta5)
train_dataset_cityscapes = CityScapes(root_dir_cityscapes, split='train', transform_img=transform_img_cityscapes, transform_lab=transform_label_cityscapes)

# Training
for i in range(start_epoch, 50) :
    #virtually expanded
    train_loader_gta5 = DataLoader(train_dataset_gta5, batch_size=8, shuffle=True)
    vtrain_loader_cityscapes = DataLoader(train_dataset_cityscapes, batch_size=8, shuffle=True)
    
    print(f'Epoch: {i}')
    current_lr = poly_lr_scheduler(optimizer, init_lr, iter = i)
    train_DACS(bisenet_model, optimizer, train_loader_gta5, train_loader_cityscapes, loss_fn, device, is_gta5 = True)
    torch.save({
          'model_state_dict' : bisenet_model.state_dict(),
          'optimizer_state_dict' : optimizer.state_dict(),
          'epoch' : i,
          'lr': current_lr
        }, '/kaggle/working/gta_bisenet_DACS_checkpoint.pth')

In [None]:
# Validation
classes = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'light', 'sign','vegetation', 'terrain', 'sky','person',
        'rider', 'car','truck','bus','train', 'motocycle','bicycle'] 

root_dir_val = '/kaggle/input/citiscapes/Cityscapes/Cityspaces'
val_dataset = CityScapes(root_dir_val, split='val', transform_img=transform_img_cityscapes,transform_lab=transform_label_cityscapes)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

val = val(bisenet_model, val_loader, device)
for i in range(len(classes)) : 
    print(f'{classes[i]} : {val[i]*100:.4f}%')

{'Final mIoU:': 0.3212515447123231}
road : 88.2905%
sidewalk : 30.8621%
building : 76.9870%
wall : 21.5801%
fence : 13.7093%
pole : 25.7958%
light : 13.7701%
sign : 14.9595%
vegetation : 70.6447%
terrain : 13.2385%
sky : 64.4308%
person : 48.1232%
rider : 0.6920%
car : 80.5979%
truck : 14.5439%
bus : 21.4569%
train : 7.0231%
motocycle : 3.6725%
bicycle : 0.0000%
