In [None]:
# !unzip -n -q /content/drive/MyDrive/Colab/DrivableArea/data/bdd100k_drivable_labels_trainval.zip -d /content/drive/MyDrive/Colab/DrivableArea/

In [None]:
# !7za x /content/drive/MyDrive/Colab/DrivableArea/data/bdd100k_drivable_labels_trainval.zip
# !cp --progress -r /content/bdd100k /content/drive/MyDrive/Colab/DrivableArea
# ! du -h /content/bdd100k/

In [1]:
# ! wget https://dl.cv.ethz.ch/bdd100k/drivable/models/fcn_r50-d8_769x769_40k_drivable_bdd100k.pth
# ! pip install mmsegmentation
# # ! pip install mmcv-full
# !pip install -U openmim
# !mim install mmengine
# !mim install mmcv

In [13]:
import gc
import torch
torch.cuda.empty_cache()
gc.collect()

0

: 

## Set params

In [1]:
BATCH_SIZE = 4
NUM_EPOCHS = 5
LR = 0.0001

STORE_MODEL_NAME = "deeplabv3_backbone_refined_benchmark2"
checkpoint_file = "/home/zekun/drivable/outputs/deeplabv3_backbone_refined_benchmark-20230427_225904.pth"
config_file = "/home/zekun/drivable/models/config-deeplabv3.py"


condition = {
    "weather": ["clear", "undefined", "rainy", "snowy", "overcast", "partly cloudy", "foggy"],  
    # "clear", "undefined", "rainy", "snowy", "overcast", "partly cloudy", "foggy"
    "timeofday": ["daytime", "undefined", "night", "dawn/dusk"],   
    # "daytime", "undefined", "night", "dawn/dusk"
    "scene": ["tunnel", "residential", "parking lot", "undefined", "city street", "gas stations", "highway"],  
    # "tunnel", "residential", "parking lot", "undefined", "city street", "gas stations", "highway"
}
# condition = None

sample_limit = 0

# output_size = (769,769)
output_size = (512,1024)

import sys
import os
# sys.path.append('/content/drive/MyDrive/Colab/DrivableArea') # <= change path where you save code
BASE_PATH = "./"
OUTPUT_DIR = "./outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

import torch
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

cuda


## Get Images and load to dataloader

### Dataset definition

In [2]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os
from pathlib import Path
from importlib import reload
import matplotlib.pyplot as plt
import numpy as np

class BDD100kDataset(Dataset):
    def __init__(self, data_fns, msk_fn, split='train', transform=None, transform2=None):
        super(BDD100kDataset, self).__init__()
        # assert split in ['train', 'val', 'test'], "Invalid split provided. Expected 'train', 'val' or 'test'"
        
        self.image_fns = data_fns
        self.msk_fn = msk_fn
        self.split = split
        self.transform = transform
        self.transform2 = transform2
        
        # Check that image file names and label file names match
        # assert len(self.image_file_names) == len(self.label_file_names), "Number of images and labels do not match"
        self.num_samples = len(self.image_fns)
    
    def __getitem__(self, index):
        # Load image and label
        image = Image.open(self.image_fns[index])
        label = Image.open(self.msk_fn(self.image_fns[index]))
        
        # Apply transformations if provided
        if self.transform is not None:
            image = self.transform(image)

        if self.transform2 is not None:
            label = self.transform2(label)
        
        # Convert label to tensor and convert from RGB to single channel (grayscale)
        label = torch.tensor(np.array(label)*255, dtype=torch.int64)
        label = torch.nn.functional.one_hot(label, num_classes=3).permute(2, 0, 1).float()
        
        return image, label
    
    def __len__(self):
        return self.num_samples

### Get images

In [3]:
IMAGE_PATH = os.path.join("data", "bdd100k", "images", "100k")
IMAGE_PATH_TRAIN = os.path.join(IMAGE_PATH, "train")
IMAGE_PATH_VAL = os.path.join(IMAGE_PATH, "val")

LABEL_PATH = os.path.join("data", "bdd100k", "labels", "drivable", "masks")
LABEL_PATH_TRAIN = os.path.join(LABEL_PATH, "train")
LABEL_PATH_VAL = os.path.join(LABEL_PATH, "val")

msk_fn_train = lambda fn : fn.replace(IMAGE_PATH_TRAIN, LABEL_PATH_TRAIN).replace("jpg", "png")
msk_fn_val = lambda fn : fn.replace(IMAGE_PATH_VAL, LABEL_PATH_VAL).replace("jpg", "png")

In [4]:
if condition == None:
    train_fns = [str(f) for f in Path(IMAGE_PATH_TRAIN).rglob("*.jpg")]
    val_fns = [str(f) for f in Path(IMAGE_PATH_VAL).rglob("*.jpg")]
else:
    # Load the JSON file
    import json
    with open(f'{BASE_PATH}/data/bdd100k/labels/drivable/bdd100k_labels_images_attributes_train.json') as f:
        data = json.load(f)

    # Extract the desired fields from the data
    result = []
    for entry in data:
        if entry["attributes"]["weather"] not in condition["weather"]:
            continue
        if entry["attributes"]["timeofday"] not in condition["timeofday"]:
            continue
        if entry["attributes"]["scene"] not in condition["scene"]:
            continue
        result.append(os.path.join(IMAGE_PATH_TRAIN, entry["name"]))

    train_fns = result

    # Load the JSON file
    with open(f'{BASE_PATH}/data/bdd100k/labels/drivable/bdd100k_labels_images_attributes_val.json') as f:
        data = json.load(f)

    # Extract the desired fields from the data
    result = []
    for entry in data:
        if entry["attributes"]["weather"] not in condition["weather"]:
            continue
        if entry["attributes"]["timeofday"] not in condition["timeofday"]:
            continue
        if entry["attributes"]["scene"] not in condition["scene"]:
            continue
        result.append(os.path.join(IMAGE_PATH_VAL, entry["name"]))

    # print(result[1])
    # print(len(result))

    val_fns = result

import random
if sample_limit != 0:
    train_fns = random.sample(train_fns, min(sample_limit, len(train_fns)))
    val_fns = random.sample(val_fns, min(int(sample_limit/10), len(val_fns)))

print(f"train img: {len(train_fns)}")
print(f"val img: {len(val_fns)}")

num_train_samples = len(train_fns)
num_val_samples = len(val_fns)

print(train_fns[1])
print(msk_fn_train(train_fns[1]))

train img: 69863
val img: 10000
data/bdd100k/images/100k/train/0000f77c-62c2a288.jpg
data/bdd100k/labels/drivable/masks/train/0000f77c-62c2a288.png


### Create DataLoaders

In [5]:
from torch.utils.data import DataLoader

# Define transformation to be applied to both images and labels
transform = transforms.Compose([
    transforms.Resize(output_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.squeeze())
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform2 = transforms.Compose([
    transforms.Resize(output_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.squeeze())
])

# Create training and validation datasets and data loaders
train_dataset = BDD100kDataset(train_fns, msk_fn_train, split='train', transform=transform, transform2=transform2)
val_dataset = BDD100kDataset(val_fns, msk_fn_val, split='val', transform=transform, transform2=transform2)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"train_loader: {len(train_loader)}")
print(f"val_loader: {len(val_loader)}")

img, lbl = train_dataset[1]
print(img.shape, lbl.shape)
print(np.unique(lbl.numpy()))

train_loader: 17466
val_loader: 2500
torch.Size([3, 512, 1024]) torch.Size([3, 512, 1024])
[0. 1.]


In [6]:
# NPLOT = 6
# idxs = np.argsort(np.random.rand(50))[:NPLOT]
# Y = 2

# fig, axs = plt.subplots(Y, NPLOT, figsize=(NPLOT*5, Y*3))
# fig.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0.2, wspace=0.01)

# for i, idx in enumerate(idxs):
#   image, label = val_dataset[idx]
#   image = np.transpose(image.numpy(), (1, 2, 0))  # Add batch dimension

#   # Pass the image through the model to get predicted output
#   # output = model(image)
#   # output = torch.argmax(output, dim=1).squeeze().detach().cpu().numpy()
#   output = torch.argmax(label, dim=0)

#   # Convert the predicted output to a color-coded mask
#   mask = np.zeros((output.shape[0], output.shape[1], 3))
#   mask[output == 0] = [0, 1, 0]  # Direct
#   mask[output == 1] = [0, 0, 1]  # Alternative   
#   mask[output == 2] = [0, 0, 0]  # Background

#   # Overlay the mask on top of the input image
#   alpha = 0.3
#   overlay = (alpha * mask + image)

#   # Plot the input image and overlayed mask
#   axs[0, i].imshow(image)
#   axs[0, i].set_title('Image')
#   axs[0, i].axis('off')

#   axs[1, i].imshow(overlay)
#   axs[1, i].set_title('Label')
#   axs[1, i].axis('off')

# plt.show()

## Model Definition And Train

### Model Definition

In [6]:
# checkpoint_file = '/content/fcn_r50-d8_769x769_40k_drivable_bdd100k.pth'
# checkpoint_file = f'outputs/{checkpoint_file_name}' # defined above
# img_path = '/content/bdd100k/images/100k/train/0000f77c-62c2a288.jpg'

from mmseg.apis import inference_model, init_model, show_result_pyplot
import mmcv
print(mmcv.version.version_info)
from mmengine import runner

backbone = init_model(config_file, device='cpu')
checkpoint = runner.load_checkpoint(backbone, checkpoint_file)

(2, 0, 0)




Loads checkpoint by local backend from path: /home/zekun/drivable/outputs/deeplabv3_backbone_refined_benchmark-20230427_225904.pth


In [None]:
import models.modelInterface
reload(models.modelInterface)
from models.modelInterface import BDD100kModel

model = BDD100kModel(num_classes=3, backbone=backbone, size=output_size)
# model = load_checkpoint(model, "test-20230402_144923.pth", OUTPUT_DIR)
model.to(DEVICE)

### Model Train parameters

In [8]:
import datetime
import torch.optim as optim
from torch import nn
from lib.data.tools import load_checkpoint

optimizer = optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

now = datetime.datetime.now()
timestamp = now.strftime("%Y%m%d_%H%M%S")
stored_model_name = f"{STORE_MODEL_NAME}-{timestamp}.pth"

### Model Train

In [9]:
import lib.runners
reload(lib.runners)
from lib.runners import train_epoch, valid_epoch
from lib.data.tools import save_model

def train(train_data_loader, val_data_loader, model, optimizer, epoch_i, epoch_total, max_score):
  train_log = train_epoch(
      model=model,
      optimizer=optimizer,
      criterion=criterion,
      dataloader=train_data_loader,
      device=DEVICE
  )
  valid_logs = valid_epoch(
      model=model,
      criterion=criterion,
      dataloader=val_data_loader,
      device=DEVICE,
  )
  epoch_score = valid_logs["Score"]  # Maybe print more information here for analysis
  if max_score < epoch_score:
      max_score = epoch_score
      save_model(
          model=model.backbone,
          epoch=epoch_i,
          best_score=max_score,
          model_name=stored_model_name,
          output_dir=OUTPUT_DIR,
      )
  save_model(
      model=model.backbone,
      epoch=epoch_i,
      best_score=epoch_score,
      model_name="tmp.pth",
      output_dir=OUTPUT_DIR,
  )
  return max_score

In [10]:
import time

start = time.time()
print(f"start at {time.ctime()}")

model.to(DEVICE).train()
# epoch_i = 1
max_score = 0
for epoch_i in range(NUM_EPOCHS):
    # training
    print(f"\nEpoch: {epoch_i} / {NUM_EPOCHS}\n-------------------------------")
    t1 = time.time()
    max_score = train(train_loader, val_loader, model, optimizer, epoch_i, NUM_EPOCHS, max_score)
    t2 = time.time()
    print(f"\nEpoch {epoch_i} / {NUM_EPOCHS}: ", t2-t1, " unit time")

print("Elapsed time: {:.3f} min".format((time.time() - start) / 60.0))

start at Thu May 11 16:28:08 2023

Epoch: 0 / 5
-------------------------------


Train: 100%|██████████| 17466/17466 [3:09:56<00:00,  1.53it/s, Loss=0.0783, Score=0.756]  
Valid: 100%|██████████| 2500/2500 [09:45<00:00,  4.27it/s, Loss=0.0826, Score=0.737]


model saved
model saved

Epoch 0 / 5:  11981.540621042252  unit time

Epoch: 1 / 5
-------------------------------


Train: 100%|██████████| 17466/17466 [3:09:37<00:00,  1.54it/s, Loss=0.074, Score=0.766]   
Valid: 100%|██████████| 2500/2500 [09:46<00:00,  4.26it/s, Loss=0.0812, Score=0.741]


model saved
model saved

Epoch 1 / 5:  11964.018610715866  unit time

Epoch: 2 / 5
-------------------------------


Train:   1%|▏         | 260/17466 [02:50<3:08:01,  1.53it/s, Loss=0.0712, Score=0.77] 


KeyboardInterrupt: 

: 

### Model Test

In [None]:
model.eval()

valid_logs = valid_epoch(
    model=model,
    criterion=criterion,
    dataloader=val_loader,
    device=DEVICE,
)

print(valid_logs)

Valid: 100%|██████████| 2500/2500 [09:28<00:00,  4.40it/s, Loss=0.136, Score=0.654]

{'Loss': 0.13587871412411331, 'Score': 0.6538538311097193}





In [None]:
NPLOT = 6
idxs = np.argsort(np.random.rand(50))[:NPLOT]
Y = 3

fig, axs = plt.subplots(Y, NPLOT, figsize=(NPLOT*3, Y*3))
fig.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0.2, wspace=0.01)

# model.to(DEVICE)
model.eval()

for i, idx in enumerate(idxs):
  image, label = val_dataset[idx]

  # Pass the image through the model to get predicted output
  # output = model(image)
  # output = torch.argmax(output, dim=1).squeeze().detach().cpu().numpy()
  with torch.no_grad():
    output = torch.argmax(label, dim=0)
    pred = model(image.unsqueeze(0).to(DEVICE))
    # pred = torch.nn.Upsample(scale_factor=8, mode='bilinear', align_corners=False)
    # print(pred.shape)
    # print(np.transpose(label.squeeze().numpy(), (1,2,0)))
    # print(np.transpose(pred.squeeze().detach().cpu().numpy(), (1, 2, 0)))
    pred = torch.argmax(pred, dim=1).squeeze().detach().cpu().numpy()
    

  image = np.transpose(image.numpy(), (1, 2, 0))  # Add batch dimension

  # Convert the predicted output to a color-coded mask
  mask = np.zeros((output.shape[0], output.shape[1], 3))
  mask[output == 0] = [0, 1, 0]  # Direct
  mask[output == 1] = [0, 0, 1]  # Alternative   
  mask[output == 2] = [0, 0, 0]  # Background

  # Overlay the mask on top of the input image
  alpha = 0.3
  overlay = (alpha * mask + image)

  # Convert the predicted output to a color-coded mask
  mask[pred == 0] = [0, 1, 0]  # Direct
  mask[pred == 1] = [0, 0, 1]  # Alternative   
  mask[pred == 2] = [0, 0, 0]  # Background
  overlay2 = (alpha * mask + image)

  # Plot the input image and overlayed mask
  axs[0, i].imshow(image)
  axs[0, i].set_title('Image')
  axs[0, i].axis('off')

  axs[1, i].imshow(overlay)
  axs[1, i].set_title('Label')
  axs[1, i].axis('off')

  axs[2, i].imshow(overlay2)
  axs[2, i].set_title('Pred')
  axs[2, i].axis('off')

plt.show()

## Predict Masks

In [52]:
TMP_DIR = "tmp"

import shutil
from tqdm import tqdm
shutil.rmtree(TMP_DIR)
os.makedirs(TMP_DIR, exist_ok=True)
os.makedirs(f"{TMP_DIR}/mask", exist_ok=True)
os.makedirs(f"{TMP_DIR}/pred", exist_ok=True)

from torch.utils.data import DataLoader
import torchvision.transforms.functional as TF

# Define dataloader for validation dataset
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False)

# Set model to evaluation mode
model.eval()
# model.to(DEVICE)

# Define transform to resize predicted mask to original image size
resize = transforms.Resize((720, 1280))

# Iterate over validation dataset
iterator = tqdm(val_dataloader, desc="Predicting")
for i, (image, _) in enumerate(iterator):
    # Move data to GPU if available
    image = image.to(DEVICE)
    # print(image.device)

    # Get predicted mask from model
    with torch.no_grad():
        output = model(image)
        mask = output.argmax(dim=1)

    # Resize mask to original image size and convert to PIL image
    mask = resize(mask.unsqueeze(1))
    mask_pil = transforms.ToPILImage()(mask.to(torch.uint8).squeeze().cpu())

    # print(mask_pil.size)
    
    msk_path = msk_fn_val(val_fns[i])
    name = os.path.basename(msk_path)
    shutil.copy(msk_path, f"{TMP_DIR}/mask/")
    # Save resized mask to disk with the same name as the input image
    mask_pil.save(f'{TMP_DIR}/pred/{name}')
    # break

Predicting: 100%|██████████| 660/660 [00:39<00:00, 16.70it/s]


In [53]:
! python3 -m bdd100k.bdd100k.eval.run -t drivable -g ./tmp/mask/ -r ./tmp/pred/ --out-file ./tmp/result.json

import json

with open(f'{TMP_DIR}/result.json') as f:
    data = json.load(f)
    print(f.read())

with open(f'./bdd100k-eval-result.json', "r") as f:
    results = json.load(f)
    results.append({
        "model": STORE_MODEL_NAME, 
        "condition": condition,
        "val samples": num_val_samples, 
        "metrics": data})

with open(f'./bdd100k-eval-result.json', "w") as f:
    json.dump(results, f, indent=4)

[2023-04-28 09:01:06,652 seg.py:142 evaluate_segmentation] Found 660 results
[2023-04-28 09:01:06,653 seg.py:155 evaluate_segmentation] evaluating...
[2023-04-28 09:01:06,654 utils.py:100 reorder_preds] 0 images are missed in the prediction.
100%|████████████████████████████████████| 660/660 [00:00<00:00, 1020361.46it/s]
[2023-04-28 09:01:09,296 seg.py:173 evaluate_segmentation] accumulating...
[2023-04-28 09:01:09,297 seg.py:201 evaluate_segmentation] GT id set [0,1]
[2023-04-28 09:01:09,297 run.py:285 run] 
             IoU  Acc
---------------------
direct      84.4 94.8
alternative 56.4 82.4
---------------------
AVERAGE     70.4 88.6




## Simple Test

In [11]:
import torch

# assuming batch size of 4, 3 classes, and 256x256 output size
batch_size = 4
num_classes = 3
output_size = (4, 4)

# create some random predictions
predictions = torch.randn(batch_size, num_classes, *output_size).to("cuda")
# predictions = predictions_cpu.to(device="cuda")
print(*(predictions.shape), predictions.device)

# convert predictions to one-hot format
one_hot = torch.zeros(batch_size, num_classes, *output_size).to("cuda")
max_idx = torch.argmax(predictions, dim=1)
print(one_hot.device, max_idx.device)
one_hot.scatter_(1, max_idx.unsqueeze(1), 1)
print(one_hot[0])

# print(torch.softmax(one_hot, dim=1)[0])

4 3 4 4 cuda:0
cuda:0 cuda:0
tensor([[[0., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 1.],
         [1., 0., 0., 0.]],

        [[1., 1., 0., 1.],
         [0., 0., 0., 1.],
         [0., 1., 0., 0.],
         [0., 1., 1., 1.]],

        [[0., 0., 1., 0.],
         [0., 1., 1., 0.],
         [0., 0., 1., 0.],
         [0., 0., 0., 0.]]], device='cuda:0')
