# Experiment management for semantic segmentation tasks by W&B, and visualization through image overlay and tables

In [1]:
import pandas as pd
import params
import utils
from fastai.vision.all import *
from fastai.callback.wandb import WandbCallback
from types import SimpleNamespace
import os, warnings
from sklearn.model_selection import StratifiedGroupKFold

from utils import get_predictions, create_iou_table, MIOU, BackgroundIOU, \
                  RoadIOU, TrafficLightIOU, TrafficSignIOU, PersonIOU, VehicleIOU, BicycleIOU
import wandb
import os
wandb.login()
warnings.filterwarnings('ignore')

[34m[1mwandb[0m: Currently logged in as: [33mkeisuke-kamata[0m ([33mwandb-japan[0m). Use [1m`wandb login --relogin`[0m to force relogin


# 01: Data upload and EDA

In [2]:
DEBUG = False

In [3]:
URL = 'https://storage.googleapis.com/wandb_course/bdd_simple_1k.zip'
path = Path(untar_data(URL, force_download=True))

In [4]:
def label_func(fname):
    return (fname.parent.parent/"labels")/f"{fname.stem}_mask.png"

def get_classes_per_image(mask_data, class_labels):
    unique = list(np.unique(mask_data))
    result_dict = {}
    for _class in class_labels.keys():
        result_dict[class_labels[_class]] = int(_class in unique)
    return result_dict

def _create_table(image_files, class_labels):
    labels = [str(class_labels[_lab]) for _lab in list(class_labels)]
    table = wandb.Table(columns=["File_Name", "Images", "Split"] + labels)

    for i, image_file in progress_bar(enumerate(image_files), total=len(image_files)):
        image = Image.open(image_file)
        mask_data = np.array(Image.open(label_func(image_file)))
        class_in_image = get_classes_per_image(mask_data, class_labels)
        table.add_data(
            str(image_file.name),
            wandb.Image(
                    image,
                    masks={
                        "predictions": {
                            "mask_data": mask_data,
                            "class_labels": class_labels,
                        }
                    }
            ),
            "None",
            *[class_in_image[_lab] for _lab in labels]
        )

    return table

In [5]:
import params
with wandb.init(entity=params.WANDB_ENTITY, project=params.WANDB_PROJECT,  job_type="upload") as run:

  # log data with Artifacts
  raw_data_at = wandb.Artifact(params.RAW_DATA_AT,
                               type="raw_data",
                               metadata={
                                  "url": 'https://storage.googleapis.com/wandb_course/bdd_simple_1k.zip',
                              })
  raw_data_at.add_file(path/'LICENSE.txt', name='LICENSE.txt')
  raw_data_at.add_dir(path/'images', name='images')
  raw_data_at.add_dir(path/'labels', name='labels')

  if DEBUG: image_files = image_files[:10]

  # Visualize data with Tables
  image_files = get_image_files(path/"images", recurse=False)
  table = _create_table(image_files, params.BDD_CLASSES)
  raw_data_at.add(table, "eda_table")
  run.log_artifact(raw_data_at)


[34m[1mwandb[0m: Currently logged in as: [33mkeisuke-kamata[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Adding directory to artifact (/home/olachinkeigpu/.fastai/data/bdd_simple_1k/images)... Done. 0.2s
[34m[1mwandb[0m: Adding directory to artifact (/home/olachinkeigpu/.fastai/data/bdd_simple_1k/labels)... Done. 0.1s


# 02: Data Preparation

In [6]:
import params
with wandb.init(entity=params.WANDB_ENTITY, project=params.WANDB_PROJECT, job_type="data_split") as run:

  ## Data Preparation
  # data download
  raw_data_at = run.use_artifact(f'{params.WANDB_ENTITY}/{params.WANDB_PROJECT}/{params.RAW_DATA_AT}:latest')
  path = Path(raw_data_at.download())

  fnames = os.listdir(path/'images')
  groups = [s.split('-')[0] for s in fnames]
  orig_eda_table = raw_data_at.get("eda_table")
  y = orig_eda_table.get_column('bicycle')

  df = pd.DataFrame()
  df['File_Name'] = fnames
  df['fold'] = -1

  # data split
  cv = StratifiedGroupKFold(n_splits=10)
  for i, (train_idxs, test_idxs) in enumerate(cv.split(fnames, y, groups)):
      df.loc[test_idxs, ['fold']] = i

  df['Stage'] = 'train'
  df.loc[df.fold == 0, ['Stage']] = 'test'
  df.loc[df.fold == 1, ['Stage']] = 'valid'
  del df['fold']
  df.Stage.value_counts()
  df.to_csv('data_split.csv', index=False)
  processed_data_at = wandb.Artifact(params.PROCESSED_DATA_AT, type="split_data")

  # Data upload
  processed_data_at.add_file('data_split.csv')
  processed_data_at.add_dir(path)

  # Data Visualization
  # Table for eda
  data_split_table = wandb.Table(dataframe=df[['File_Name', 'Stage']])

  # join table
  join_table = wandb.JoinedTable(orig_eda_table, data_split_table, "File_Name")
  processed_data_at.add(join_table, "eda_table_data_split")
  run.log_artifact(processed_data_at) # visualization on wandb artifacts

[34m[1mwandb[0m: Downloading large artifact bdd_simple_1k:latest, 846.57MB. 4007 files... 
[34m[1mwandb[0m:   4007 of 4007 files downloaded.  
Done. 0:0:9.0
[34m[1mwandb[0m: Downloading large artifact bdd_simple_1k:latest, 846.57MB. 4007 files... 
[34m[1mwandb[0m:   4007 of 4007 files downloaded.  
Done. 0:0:11.5
[34m[1mwandb[0m: Adding directory to artifact (/home/olachinkeigpu/Project/adas-handson/artifacts/bdd_simple_1k:v0)... Done. 0.7s


# 03: Baseline

Again, we're importing some global configuration parameters from `params.py` file. We have also defined some helper functions in `utils.py` - for example metrics we will track during our experiments.

Let's now create a `train_config` that we'll pass to W&B `run` to control training hyperparameters.


In [7]:
train_config = SimpleNamespace(
    framework="fastai",
    img_size=(180, 320),
    batch_size=4,
    augment=True, # use data augmentation
    epochs=20,
    lr=2e-2,
    pretrained=True,  # whether to use pretrained encoder
    seed=42,
)

In [8]:
set_seed(train_config.seed, reproducible=True)
def label_func(fname):
    return (fname.parent.parent/"labels")/f"{fname.stem}_mask.png"

def get_data(df, bs=4, img_size=(180, 320), augment=True):
    block = DataBlock(blocks=(ImageBlock, MaskBlock(codes=params.BDD_CLASSES)),
                  get_x=ColReader("image_fname"),
                  get_y=ColReader("label_fname"),
                  splitter=ColSplitter(),
                  item_tfms=Resize(img_size),
                  batch_tfms=aug_transforms() if augment else None,
                 )
    return block.dataloaders(df, bs=bs)

We will use intersection over union metrics: mean across all classes (MIOU)
and IOU for each class separately. Our model will be a unet based on pretrained resnet18 backbone.

In fastai we already have a callback that integrates tightly with W&B, we only need to pass the WandbCallback to the learner and we are ready to go.
The callback will log all the useful variables for us.
For example, whatever metric we pass to the learner will be tracked by the callback.


In fastai we already have a callback that integrates tightly with W&B,
we only need to pass the WandbCallback to the learner and we are ready to go.
The callback will log all the useful variables for us.
For example, whatever metric we pass to the learner will be tracked by the callback.

In [9]:
import params
with wandb.init(entity=params.WANDB_ENTITY, project=params.WANDB_PROJECT, job_type="training", config=train_config) as run:
  config = wandb.config

  # data download
  processed_data_at = run.use_artifact(f'{params.WANDB_ENTITY}/{params.WANDB_PROJECT}/{params.PROCESSED_DATA_AT}:latest', type='split_data')
  processed_dataset_dir = Path(processed_data_at.download())
  df = pd.read_csv(processed_dataset_dir / 'data_split.csv')

  #  We do not use a holdout set here. The 'is_valid' column is set
  #  to inform the trainer about the split between training and validation.
  df = df[df.Stage != 'test'].reset_index(drop=True)
  df['is_valid'] = df.Stage == 'valid'
  # assign paths
  # We use the fastai DataBlock API to feed data for the training and validation of the model.
  df["image_fname"] = [processed_dataset_dir/f'images/{f}' for f in df.File_Name.values]
  df["label_fname"] = [label_func(f) for f in df.image_fname.values]

  dls = get_data(df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)

  metrics = [MIOU(), BackgroundIOU(), RoadIOU(), TrafficLightIOU(), \
          TrafficSignIOU(), PersonIOU(), VehicleIOU(), BicycleIOU()]

  learn = unet_learner(dls, arch=resnet18, pretrained=config.pretrained, metrics=metrics)

  callbacks = [
      SaveModelCallback(monitor='miou'),
      WandbCallback(log_preds=False, log_model=True)
  ]
  learn.fit_one_cycle(config.epochs, config.lr, cbs=callbacks)

  samples, outputs, predictions = get_predictions(learn)
  table = create_iou_table(samples, outputs, predictions, params.BDD_CLASSES)
  wandb.log({"pred_table":table})

  scores = learn.validate()
  metric_names = ['final_loss'] + [f'final_{x.name}' for x in metrics]
  final_results = {metric_names[i] : scores[i] for i in range(len(scores))}
  for k,v in final_results.items():
      wandb.summary[k] = v
  # We are reloading the model from the best checkpoint at the end and saving it.
  # To make sure we track the final metrics correctly,
  # we will validate the model again and save the final loss and metrics to wandb.summary.

[34m[1mwandb[0m: Downloading large artifact bdd_simple_1k_split:latest, 846.07MB. 4010 files... 
[34m[1mwandb[0m:   4010 of 4010 files downloaded.  
Done. 0:0:8.2
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/olachinkeigpu/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44.7M/44.7M [00:02<00:00, 20.6MB/s]


epoch,train_loss,valid_loss,miou,background_iou,road_iou,traffic_light_iou,traffic_sign_iou,person_iou,vehicle_iou,bicycle_iou,time
0,0.489724,0.42089,0.267093,0.827925,0.739418,0.0,0.0,0.0,0.302305,0.0,00:08
1,0.563676,0.440962,0.275228,0.844171,0.671292,0.0,0.0,0.0,0.411136,0.0,00:08
2,0.834528,0.777394,0.103109,0.676777,0.044984,0.0,0.0,0.0,0.0,0.0,00:08
3,18.60668,0.886938,0.096392,0.674747,0.0,0.0,0.0,0.0,0.0,0.0,00:08
4,1.194156,0.874065,0.096392,0.674747,0.0,0.0,0.0,0.0,0.0,0.0,00:08
5,0.881833,0.872343,0.096392,0.674747,0.0,0.0,0.0,0.0,0.0,0.0,00:08
6,0.868688,0.871753,0.096392,0.674747,0.0,0.0,0.0,0.0,0.0,0.0,00:08
7,0.875667,0.872798,0.096392,0.674747,0.0,0.0,0.0,0.0,0.0,0.0,00:08
8,0.869034,0.87132,0.096392,0.674747,0.0,0.0,0.0,0.0,0.0,0.0,00:08
9,0.877879,0.872457,0.096392,0.674747,0.0,0.0,0.0,0.0,0.0,0.0,00:08


Better model found at epoch 0 with miou value: 0.2670926544786666.
Better model found at epoch 1 with miou value: 0.27522844875238933.


0,1
background_iou,▇█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
bicycle_iou,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
eps_0,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_2,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_0,▁▂▂▃▄▅▆▇███████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁
lr_1,▁▂▂▃▄▅▆▇███████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁
lr_2,▁▂▂▃▄▅▆▇███████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁
miou,██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
background_iou,0.67475
bicycle_iou,0.0
epoch,20.0
eps_0,1e-05
eps_1,1e-05
eps_2,1e-05
final_background_iou,0.84417
final_bicycle_iou,0.0
final_loss,0.44096
final_miou,0.27523
