<a href="https://colab.research.google.com/github/marcomag416/MLDL/blob/main/bisenet_3b_colab_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preliminary code
Feel free to delete/skip this part if run in a persistent environment

In [1]:
#download bisenet model from official repository
import sys
import requests
from zipfile import ZipFile
from io import BytesIO
model_url = "https://github.com/ooooverflow/BiSeNet/archive/refs/heads/master.zip"

# Send a GET request to the URL
response = requests.get(model_url)
# Check if the request was successful
if response.status_code == 200:
    #print(response.content)
    # Open the downloaded bytes and extract them
    with ZipFile(BytesIO(response.content)) as zip_file:
        zip_file.extractall('./')
    print('Download and extraction complete!')

sys.path.insert(0, './BiSeNet-master')

Download and extraction complete!


In [2]:
#download pytorchs1_gta from github

url= "https://github.com/marcomag416/MLDL/archive/refs/heads/main.zip"

# Send a GET request to the URL
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
    #print(response.content)
    # Open the downloaded bytes and extract them
    with ZipFile(BytesIO(response.content)) as zip_file:
        zip_file.extractall('./')
    print('Download and extraction complete!')

sys.path.insert(0, './MLDL-main')

Download and extraction complete!


In [3]:
#download cityscapes dataset
from google.colab import drive
drive.mount('/content/drive')

with ZipFile("/content/drive/MyDrive/Colab Notebooks/dataset/Cityscapes.zip", 'r') as zip_ref:
    zip_ref.extractall("./dataset")

cityscape_dataset_path = "./dataset/Cityscapes/Cityspaces"

Mounted at /content/drive


In [4]:
#download and index gta5 dataset
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

gta_path = "/content/drive/MyDrive/Colab Notebooks/dataset/GTA5.zip"

gta_dataset_path = "./dataset/"

#extract zip file
with ZipFile(gta_path, 'r') as zip_ref:
    zip_ref.extractall(gta_dataset_path)

# Define paths
images_path = gta_dataset_path + 'GTA5/images'
labels_path = gta_dataset_path + 'GTA5/labels'

# Initialize lists to hold data
data = []

# Load images and corresponding masks
for image_filename in os.listdir(images_path):
    if image_filename.endswith('.png'):
        image_path = os.path.join(images_path, image_filename)
        mask_path = os.path.join(labels_path, image_filename)

        # Check if corresponding mask file exists
        if os.path.exists(mask_path):
            # Open image and mask to ensure they can be loaded (optional, for validation)
            try:
                image = Image.open(image_path)
                mask = Image.open(mask_path)

                # Add data to list
                data.append({
                    'image_path': image_path,
                    'mask_path': mask_path
                })
            except Exception as e:
                print(f"Error loading {image_path} or {mask_path}: {e}")

# Create a DataFrame from the data list
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('./gta5_segmentation_dataset.csv', index=False)

print("Semantic segmentation dataset created and saved as 'gta5_segmentation_dataset.csv'")

Semantic segmentation dataset created and saved as 'gta5_segmentation_dataset.csv'


In [5]:
#install and import wandb for data collecting
!pip install wandb
import wandb

wandb.login()

Collecting wandb
  Downloading wandb-0.17.1-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.5.1-py2.py3-none-any.whl (289 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.6/289.6 kB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# 2b BiseNet training and validation

In [11]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pytorchdl_gta5.labels import GTA5Labels_TaskCV2017

if __name__ != '__main__':
    raise Exception("This script should not be imported; it should be run directly.")

# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

# Define the custom dataset class
class GTASegmentationDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.transform = transform
        self.label_mapping = self._create_label_mapping()

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.data_frame.iloc[idx, 0]
        mask_name = self.data_frame.iloc[idx, 1]

        image = Image.open(img_name).convert('RGB')
        mask = Image.open(mask_name).convert('RGB')

        if self.transform:
            augmented = self.transform(image=np.array(image), mask=np.array(mask))
            image, mask = augmented['image'], augmented['mask']

        mask = self._map_mask(np.array(mask))

        if self.transform:
            # Convert mask to tensor without normalization
            mask = torch.from_numpy(mask).permute(2, 0, 1).float()
            mask = mask[0]

        return image, mask

    def _create_label_mapping(self):
        label_mapping = {label.color: label.ID for label in GTA5Labels_TaskCV2017.list_}
        label_mapping[(0, 0, 0)] = 255  # Ensure unmapped colors go to 'unlabeled'
        return label_mapping

    def _map_mask(self, mask):
        new_mask = np.zeros_like(mask)
        for color, label_id in self.label_mapping.items():
            color_mask = np.all(mask == color, axis=-1)
            new_mask[color_mask] = label_id  # Use label_id instead of color
        return new_mask



# Define paths
csv_file = './gta5_segmentation_dataset.csv'

# Define image transformations
transform = A.Compose([
    A.Resize(720, 1280),
    A.GaussianBlur(blur_limit=(23, 23), sigma_limit=(0.1,2.), p=0.5),
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

# Create the dataset and dataloader
train_dataset = GTASegmentationDataset(csv_file=csv_file, transform=transform)


In [12]:
class Cityscapes(Dataset):
    def __init__(self, root_dir, split, transforms=None, label_type='gtFine_labelTrainIds'):
        self.root_dir = root_dir
        self.split = split
        self.transforms = transforms
        self.label_type = label_type

        self.images_dir = f"{root_dir}/images/{split}"
        self.labels_dir = f"{root_dir}/gtFine/{split}"

        self.image_paths = []
        self.label_paths = []

        # Manually iterate over directories
        cities = [city for city in os.listdir(self.images_dir) if os.path.isdir(f"{self.images_dir}/{city}")]
        for city in cities:
            img_dir_city = f"{self.images_dir}/{city}"
            lbl_dir_city = f"{self.labels_dir}/{city}"

            if not os.path.isdir(img_dir_city) or not os.path.isdir(lbl_dir_city):
                continue

            for img_file in os.listdir(img_dir_city):
                if img_file.endswith('_leftImg8bit.png'):
                    img_path = f"{img_dir_city}/{img_file}"
                    lbl_file = img_file.replace('_leftImg8bit.png', f'_{self.label_type}.png')
                    lbl_path = f"{lbl_dir_city}/{lbl_file}"

                    if os.path.isfile(img_path) and os.path.isfile(lbl_path):
                        self.image_paths.append(img_path)
                        self.label_paths.append(lbl_path)
                    else:
                        print(f"Warning: Image or label file not found for {img_file}")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        lbl_path = self.label_paths[idx]

        image = Image.open(img_path).convert('RGB')
        label = Image.open(lbl_path)

        image = np.array(image)
        label = np.array(label)

        if self.transforms:
            augmented = self.transforms(image=image, mask=label)
            image, label = augmented['image'], augmented['mask']

        return image, label


# Example usage
image_transforms = A.Compose([
    A.Resize(512, 1024),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_dataset = Cityscapes(root_dir=cityscape_dataset_path, split='val', transforms=image_transforms)


print(f"Val_Dataset size: {len(val_dataset)}")

Val_Dataset size: 500


In [13]:

def train(model, optimizer_train, dataloader, loss_fn_train):
    model.train()  # Set the model to training mode
    train_loss = 0.0
    total = 0

    for idx, (inputs_train, targets_train) in enumerate(dataloader):
        inputs_train = inputs_train.to(device)
        targets_train = targets_train.to(device, dtype=torch.long)  # Move data to the appropriate device

        optimizer_train.zero_grad()  # Zero out gradients from the previous iteration
        outputs_train, _, _ = model(inputs_train)  # Forward pass
        # print( "train")
        loss = loss_fn_train(outputs_train, targets_train)  # Calculate the loss

        loss.backward()  # Backward pass
        optimizer_train.step()  # Update the weights

        wandb.log({"train/Batch loss": loss})

        train_loss += loss.item() * inputs_train.size(0)  # Accumulate the total loss
        _, predicted_train = outputs_train.max(1)
        total += targets_train.size(0)

    # Calculate average loss for the epoch
    avg_loss = train_loss / total

    wandb.log({"train/Epoch loss": avg_loss})

    return avg_loss


def compute_iou(pred, target, num_classes):
    ious = []
    pred = pred.view(-1)
    target = target.view(-1)

    for cls in range(num_classes):
        pred_inds = (pred == cls)
        target_inds = (target == cls)
        intersection = (pred_inds[target_inds]).sum().item()
        union = pred_inds.sum().item() + target_inds.sum().item() - intersection
        if union == 0:
            ious.append(float('nan'))  # If there is no union, set IoU to NaN
        else:
            ious.append(intersection / union)

    return np.array(ious)

def eval(model, dataloader, loss_fn, device, num_classes=19):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    total = 0
    all_ious = []  # List to store IoUs for each batch

    with torch.no_grad():  # Disable gradient calculation during inference
        for inputs_test, targets_test in dataloader:
            inputs_test, targets_test = inputs_test.to(device), targets_test.to(device, dtype=torch.long)

            outputs_test = model(inputs_test)  # Forward pass
            loss = loss_fn(outputs_test, targets_test)  # Calculate the loss

            test_loss += loss.item() * inputs_test.size(0)  # Accumulate the total loss
            _, predicted_test = outputs_test.max(1)
            total += targets_test.size(0)

            # Compute IoU for this batch
            batch_ious = compute_iou(predicted_test, targets_test, num_classes)
            all_ious.append(batch_ious)

    # Calculate average loss
    avg_loss = test_loss / total

    wandb.log({})

    # Calculate mean IoU
    all_ious = np.array(all_ious)
    mean_iou = np.nanmean(all_ious, axis=0)  # Mean IoU for each class
    miou = np.nanmean(mean_iou)  # Mean IoU across all classes

    wandb.log({"val/Validation loss": avg_loss, "val/mIoU": miou})

    return avg_loss, miou

In [14]:
from model.build_BiSeNet import BiSeNet
from torch import nn
from torch.optim.lr_scheduler import PolynomialLR


# Set CUDA_LAUNCH_BLOCKING environment variable
os.environ['CUDA_LAUNCH_BLOCKING']="1"
os.environ['TORCH_USE_CUDA_DSA'] = "1"

context_path = 'resnet18'

#save hyperparameters
config = {
    "learning_rate": 1e-3,
    "max_epochs": 50,
    "batch_size": 8,
    "weight_decay": "None",
    "dataset": "Cityscapes",
    "scheduler": "None",
    "optimizer": "Adam",
    "transformations": "gaussian blur, horizontal flip"
}


# create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)

# Initialize the model
model = BiSeNet(num_classes=19, context_path=context_path).to(device)

loss_fn = nn.CrossEntropyLoss(ignore_index=255)
optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"])
#scheduler = PolynomialLR(optimizer, total_iters=config["max_epochs"], power=config["polyPower"])
scheduler = None

# Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [None]:
from timeit import default_timer as timer

#run names
project_name = "bisenet_gta"
run_name = "guassian_horizFlip"

#start wandb run
wandb.init(project=project_name, name=run_name, config=config)

start_time = timer()

# Setup training and save the results
for epoch in range(config["max_epochs"]):
    wandb.log({"Epoch": epoch+1})
    train(model, optimizer, train_dataloader, loss_fn)
    avg_loss, miou = eval(model, val_dataloader, loss_fn, device=device)
    if scheduler != None:
      scheduler.step()
    #save model state every 5 epochs
    if((epoch + 1) % 5 == 0):
      torch.save(model.state_dict(), f"./drive/MyDrive/Colab Notebooks/model_weights/{project_name}/{run_name}_epoch{epoch}.pth")
    print(f"Epoch: {epoch+1}, Loss: {avg_loss}, mIoU: {miou*100:.2f}%")

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time - start_time:.3f} seconds")
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.10868612498904373, max=1.…

0,1
Epoch,▁

0,1
Epoch,1


Epoch: 1, Loss: 1.2817029457092286, mIoU: 15.70%
Epoch: 2, Loss: 1.5510154476165772, mIoU: 13.84%
