In [None]:
#execute just the first time to move scripts and dataset from input to output
import sys
from shutil import copytree, copyfile
sys.path.append( "/kaggle/input/cityscapes" )

src_datasets='/kaggle/input/cityscapes/datasets/'
dst_datasets='/kaggle/working/datasets/'
copytree(src_datasets, dst_datasets)

src_datasets='/kaggle/input/cityscapes/Cityscapes_v2.py'
dst_datasets='/kaggle/working/datasets/Cityscapes_v2.py'
copyfile(src_datasets, dst_datasets)

src_datasets='/kaggle/input/cityscapes/deeplab_resnet_pretrained_imagenet.pth'
dst_datasets='/kaggle/working/deeplab_resnet_pretrained_imagenet.pth'
copyfile(src_datasets, dst_datasets)

src_models='/kaggle/input/cityscapes/models'
dst_models='/kaggle/working/models/'
copytree(src_models, dst_models)

copyfile(src = "/kaggle/input/cityscapes/utils.py", dst = "/kaggle/working/utils.py")

In [None]:
#this one you need it always
sys.path.append('/kaggle/input/cityscapes/datasets/') 

In [None]:
# TODO: Define here your training and validation loops.

import torch.nn.functional as F
from Cityscapes import CityScapes
from torch.utils.data import DataLoader
import torch
import numpy as np
from torchvision import transforms
from torchvision.transforms import v2
import matplotlib.pyplot as plt
from models.bisenet.build_bisenet import BiSeNet
from utils import poly_lr_scheduler

def mIoU(pred_mask, mask, smooth=1e-10, n_classes=19):
    with torch.no_grad():
        pred_mask = F.softmax(pred_mask, dim=1)
        pred_mask = torch.argmax(pred_mask, dim=1)
        pred_mask = pred_mask.contiguous().view(-1)
        mask = mask.contiguous().view(-1)

        iou_per_class = []
        for clas in range(0, n_classes): #loop per pixel class
            true_class = pred_mask == clas
            true_label = mask == clas

            if true_label.long().sum().item() == 0: #no exist label in this loop
                iou_per_class.append(np.nan)
            else:
                intersect = torch.logical_and(true_class, true_label).sum().float().item()
                union = torch.logical_or(true_class, true_label).sum().float().item()

                iou = (intersect + smooth) / (union + smooth)
                iou_per_class.append(iou)
        return np.nanmean(iou_per_class), iou_per_class
    
def pixel_accuracy(output, mask):
    with torch.no_grad():
        output = torch.argmax(F.softmax(output, dim=1), dim=1)
        correct = torch.eq(output, mask).int()
        accuracy = float(correct.sum()) / float(correct.numel())
    return accuracy

def convert_tensor_to_image(tensor):
    image = tensor.permute(1, 2, 0)
    return image

def train(model, optimizer, train_loader, criterion):
    model.train()
    running_loss = 0.0
    total = 0
    iou_score=0
    accuracy=0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        inputs = inputs.float()
        targets = targets.squeeze(dim=1)
        
        #Compute prediction and loss
        outputs,_,_ = model(inputs)
        loss = loss_fn(outputs.to(dtype=torch.float32), targets.to(dtype=torch.int64))
        iou_score += mIoU(outputs.to(device), targets.to(device))
        accuracy += pixel_accuracy(outputs.to(device), targets.to(device))
        
        #BackPropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)

    before_lr = optimizer.param_groups[0]["lr"]
    scheduler.step()
    after_lr = optimizer.param_groups[0]["lr"]
    
    train_loss = running_loss / len(train_loader)
    iou_score = iou_score / len(train_loader)
    accuracy = accuracy / len(train_loader)
    return train_loss,iou_score,accuracy

# Test loop
def test(model, test_loader, loss_fn):
    model.eval()
    test_loss = 0
    iou_score=0.0
    accuracy=0.0
    with torch.no_grad():
        for batch_idx,(inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            inputs = inputs.float()
            targets = targets.int()
            
            #Compute prediction and loss
            outputs = model(inputs)
            loss = loss_fn(outputs.to(dtype=torch.float32), targets.squeeze(dim=1).to(dtype=torch.int64))
            iou_score += mIoU(outputs.to(device), targets.to(device))
            accuracy += pixel_accuracy(outputs.to(device), targets.to(device))
            test_loss += loss.item()
            
    test_loss = test_loss / len(test_loader)
    iou_score = iou_score / len(test_loader)
    accuracy = accuracy / len(test_loader)
    #test_accuracy = 100. * correct / total
    return test_loss,iou_score,accuracy

gta_path = '/kaggle/input/GTA/GTA5/GTA5/'
image_train = gta_dataset_path+'images/'
sem_map_train = gta_dataset_path+'labels_correct/'
cityscapes_path = '/kaggle/input/cityscapes/Cityscapes/Cityscapes/Cityspaces/'
image_val = dataset_path+'images/val'
sem_map_val = dataset_path+'gtFine/val'

resize_transform_gta = v2.Resize(interpolation=transforms.InterpolationMode.NEAREST_EXACT,size = (720,1280))
resize_transform = transforms.Resize(interpolation=transforms.InterpolationMode.NEAREST_EXACT,size = (512,1024))

# Setup device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

In [None]:
# apply data augmentation
# applier_crop = v2.RandomApply(transforms=[v2.RandomCrop(size=(512, 1024))], p=0.5)

# Define Datasets and Dataloaders
gta_train = GTA5(annotations_dir=sem_map_train, images_dir=image_train,transform=resize_transform_gta)
cityscapes_val = CityScapes(annotations_dir=sem_map_val, images_dir=image_val,transform=resize_transform)

bs = 5
train_loader = DataLoader(gta_train, batch_size=bs, shuffle=False)
val_loader = DataLoader(cityscapes_val, batch_size=bs, shuffle=False)

In [None]:
print(f'training dataset contains {cityscapes_train.__len__()} images')
print(f'validation dataset contains {cityscapes_val.__len__()} images')

index = 564
# print('Image path: \n'f'{cityscapes_train.map_index_to_image[index]}')
# print('Map path: \n'f'{cityscapes_train.map_index_to_annotation[index]}')
image, sem_map = cityscapes_train.__getitem__(index)
sem_map.size()
fig, axes = plt.subplots(2, 1)
image_transpose = convert_tensor_to_image(image)
map_transpose = convert_tensor_to_image(sem_map)
axes[0].imshow(image_transpose)
axes[1].imshow(map_transpose, cmap='Blues')
plt.show()

In [None]:
# Define the model and load it to the device
model = BiSeNet(num_classes=19, context_path='resnet18')
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler=poly_lr_scheduler(optimizer, 0.01, 1, lr_decay_iter=1, max_iter=50, power=0.9)
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)

In [None]:
from torchinfo import summary
summary(model, input_size=(bs,3,720,1280), col_names=["num_params", "trainable"])

In [None]:
train_iou_list=[]
train_acc_list=[]
train_loss_list=[]
test_iou_list=[]
test_acc_list=[]
test_loss_list=[]

In [None]:
# Train the model

epoch_beginning=1
epochs = 50+epoch_beginning

for epoch in range(epoch_beginning,epochs):
    train_loss,train_iou,train_acc=train(model, optimizer, train_loader, loss_fn)
    train_iou_list.append(train_iou)
    train_acc_list.append(train_acc)
    train_loss_list.append(train_loss)
    
    #save model
    if epoch%5 == 0 or epoch==epochs:
        model_name = f"bisenet_GTA5_bs{bs}_epoch{epoch}_weights.pth"
        torch.save(model.state_dict(), model_name)
        
    #test    
    test_loss,test_iou,test_acc = test(model, val_loader, loss_fn)
    test_iou_list.append(test_iou)
    test_acc_list.append(test_acc)
    test_loss_list.append(test_loss)
    
    f = open("train_iou_list.txt", "a")
    f.write(str(train_iou)+ "\n")
    f.close()
    f = open("test_iou_list.txt", "a")
    f.write(str(test_iou)+ "\n")
    f.close()
    
    print(f"Epoch: {epoch}")
    print(f"- Train Acc: {test_acc:.3f}")
#     print(f"- Train Loss: {test_loss:.3f}")
    print(f"- Train mIoU: {test_iou:.3f}\n")
    print(f"- Test Acc: {test_acc:.3f}")
#     print(f"- Test Loss: {test_loss:.3f}")
    print(f"- Test mIoU: {test_iou:.3f}")
    print("__________________________")

In [None]:
# Plot Graphs
metrics = np.stack(arrays=[train_iou_list ,train_acc_list , train_loss_list, test_iou_list, test_acc_list, test_loss_list], axis=0)

names = ["mIou", "Accuracy", "Loss"]
plotted = 0
plt.figure(figsize=(15,3))
for i in range(len(names)):
    plotted += 1
    plt.subplot(1,3,plotted) 
    plt.plot(range(1, 12), metrics[i], label="Train")
    plt.plot(range(1, 12), metrics[i+len(names)], label="Test")
    plt.title(f"{names[i]}")
    plt.legend()
plt.show()

In [None]:
# FLOPS
from fvcore.nn import FlopCountAnalysis, flop_count_table

plot_loader = DataLoader(cityscapes_train, batch_size=1, shuffle=True)
(input,output) = next(iter(plot_loader))
height = 512
width = 1024

flops = FlopCountAnalysis(model, input.to(device,dtype=torch.float32))
print(flop_count_table(flops))

In [None]:
# Latency and FPS
import time 
import numpy as np
net = model
net.eval()
fps_loader = DataLoader(cityscapes_train, batch_size=1, shuffle=True)
(inputs, annotations) = next(iter(fps_loader))
inputs=inputs.to(device,dtype=torch.float32)
iterations=1000
latency=np.empty(0)
FPS=np.empty(0)
for i in range(iterations):
    start=time.time()
    output=net(inputs)
    end=time.time()
    latency_i=end-start
    #print(latency_i)
    latency=np.append(latency,latency_i)
    FPS_i=float(1/latency_i)
    FPS=np.append(FPS,FPS_i)
meanLatency=np.mean(latency)
stdLatency=np.std(latency)
meanFPS=np.mean(FPS)
stdFPS=np.std(FPS)

print (f"mean latency: {meanLatency} seconds")
print(f"std latency: {stdLatency} seconds")
print (f"mean FPS: {meanFPS} fps")
print(f"std FPS: {stdFPS} fps")