# Semantic Segmentation with Deep Learning: Training and Testing on Colab

Insert the following Javascript snippet into your browser console so that your Colab runtime won't time out. Open developer-settings (in your web-browser) with Ctrl+Shift+I then click on console tab and type this on the console prompt. (for mac press Option+Command+I)
```Javascript
function ClickConnect(){
    console.log("Clicked on connect button"); 
    document.querySelector("colab-connect-button").click()
}
setInterval(ClickConnect,60000)
```

Zip up your code locally with `python zip_for_colab.py`, and upload your `cv_proj5.zip` file. Hit refresh, then run the following:

In [None]:
!unzip cv_proj5_colab.zip

Install the `proj6_code` module locally:

In [None]:
!ls

Download ImageNet-pretrained ResNet-50:


In [None]:
!wget -O "resnet50_v2.pth" --no-check-certificate 'https://docs.google.com/uc?export=download&id=1w5pRmLJXvmQQA5PtCbHhZc_uC4o0YbmA'
!mkdir initmodel && mv resnet50_v2.pth initmodel/

In [None]:
# The ImageNet-pretrained ResNet-50 weights should be 99 MB
!ls -ltrh initmodel

Download the Camvid dataset images. It's 700 MB, but it should only take 30 sec.

In [None]:
!chmod +rwx download_dataset.sh
!sed -i -e 's/\r$//' download_dataset.sh
!./download_dataset.sh Camvid

In [None]:
!ls
!cd Camvid && unzip camvid_semseg11.zip && cd ..

We'll now set some default hyperparameters for training. Choose the number of epochs you'd like to train for (for PSPNet, it will take ~30 min for 50 epochs, or ~70 min for 100 epochs).

In [None]:
!python --version
from types import SimpleNamespace

args = SimpleNamespace(
    **{
        # DATA
        "names_path": "./dataset_lists/camvid-11/camvid-11_names.txt",
        "data_root": "./Camvid/",
        "train_list": "./src/dataset_lists/camvid-11/list/train.txt",  
        "val_list": "./src/dataset_lists/camvid-11/list/val.txt",
        "classes": 11,
        # TRAIN
        "arch": "PSPNet", #  "SimpleSegmentationNet", # 
        "save_path": "",
        "epochs": 5,
        "zoom_factor": 8,
        "use_ppm": True,
        "aux_weight": 0.4,
        "aux_loss": True,
        "layers": 50,
        "workers": 2,
        "batch_size": 32,
        "batch_size_val": 32,
        "data_aug": True,
        "short_size": 240,
        "train_h": 201,
        "train_w": 201,
        "init_weight": "./initmodel/resnet50_v2.pth",
        "scale_min": 0.5,  # minimum random scale
        "scale_max": 2.0,  # maximum random scale
        "rotate_min": -10,  # minimum random rotate
        "rotate_max": 10,  # maximum random rotate
        "ignore_label": 255,
        "base_lr": 0.01,
        "start_epoch": 0,
        "power": 0.9,
        "momentum": 0.9,
        "weight_decay": 0.0001,
        "manual_seed": 0,
        "print_freq": 10,
        "save_freq": 1,
        "evaluate": True,  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
        "multiprocessing_distributed": False,
        # INFERENCE
        "dataset": "camvid-11",
        "base_size": 240,
        "test_h": 201,
        "test_w": 201,
        "scales": [1.0], # [0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
        "test_list": "./src/dataset_lists/camvid-11/list/val.txt",
        "vis_freq": 10,
        "pretrained": True
    }
)

args.save_path = f"exp/camvid/{args.arch}/model"

Python 3.8.15


In [None]:
import os

import torch

os.makedirs(args.save_path, exist_ok=True)
from vision.trainer import main_worker
print(args)
main_worker(args, torch.cuda.is_available())

namespace(arch='PSPNet', aux_loss=True, aux_weight=0.4, base_lr=0.01, base_size=240, batch_size=32, batch_size_val=32, classes=11, data_aug=True, data_root='./Camvid/', dataset='camvid-11', epochs=5, evaluate=True, ignore_label=255, init_weight='./initmodel/resnet50_v2.pth', layers=50, manual_seed=0, momentum=0.9, multiprocessing_distributed=False, names_path='./dataset_lists/camvid-11/camvid-11_names.txt', power=0.9, pretrained=True, print_freq=10, rotate_max=10, rotate_min=-10, save_freq=1, save_path='exp/camvid/PSPNet/model', scale_max=2.0, scale_min=0.5, scales=[1.0], short_size=240, start_epoch=0, test_h=201, test_list='./src/dataset_lists/camvid-11/list/val.txt', test_w=201, train_h=201, train_list='./src/dataset_lists/camvid-11/list/train.txt', train_w=201, use_ppm=True, val_list='./src/dataset_lists/camvid-11/list/val.txt', vis_freq=10, weight_decay=0.0001, workers=2, zoom_factor=8)
namespace(arch='PSPNet', aux_loss=True, aux_weight=0.4, base_lr=0.01, base_size=240, batch_size=

INFO:main-logger:namespace(arch='PSPNet', aux_loss=True, aux_weight=0.4, base_lr=0.01, base_size=240, batch_size=32, batch_size_val=32, classes=11, data_aug=True, data_root='./Camvid/', dataset='camvid-11', epochs=5, evaluate=True, ignore_label=255, init_weight='./initmodel/resnet50_v2.pth', layers=50, manual_seed=0, momentum=0.9, multiprocessing_distributed=False, names_path='./dataset_lists/camvid-11/camvid-11_names.txt', power=0.9, pretrained=True, print_freq=10, rotate_max=10, rotate_min=-10, save_freq=1, save_path='exp/camvid/PSPNet/model', scale_max=2.0, scale_min=0.5, scales=[1.0], short_size=240, start_epoch=0, test_h=201, test_list='./src/dataset_lists/camvid-11/list/val.txt', test_w=201, train_h=201, train_list='./src/dataset_lists/camvid-11/list/train.txt', train_w=201, use_ppm=True, val_list='./src/dataset_lists/camvid-11/list/val.txt', vis_freq=10, weight_decay=0.0001, workers=2, zoom_factor=8)


[2022-12-07 23:05:04,532 INFO trainer.py line 60 411] => creating model ...


INFO:main-logger:=> creating model ...


[2022-12-07 23:05:04,536 INFO trainer.py line 61 411] Classes: 11


INFO:main-logger:Classes: 11


List of (image,label) pairs train list generated!
List of (image,label) pairs val list generated!




We'll now create full-resolution predictions for the full val set, and compute mIoU against the ground truth.

In [None]:
from vision.test import test_model
args.model_path = f"exp/camvid/{args.arch}/model/train_epoch_{args.epochs}.pth"
test_model(args)

**Important**: Record the mIoU listed in the output above, and the IoU per each class. You can find the results later in `train_epoch_{args.epochs}/camvid-11/720/results.txt`.

Now, let's take a look at what our results look like. We'll make a 2x3 image grid with the following structure:

|RGB Image | Blended RGB and Ground Truth | Ground Truth 
|:-: | :-: | :-:
| RGB Image | Blended RGB and Prediction | Prediction

In [None]:
import imageio
import matplotlib.pyplot as plt

rgb_predictions_dir = f"train_epoch_{args.epochs}/camvid-11/{args.base_size}/rgb_mask_predictions"

def show_image_grid(rgb_predictions_dir: str, img_fname: str) -> None:
  img_grid = imageio.imread(f'{rgb_predictions_dir}/{img_fname}')
  plt.figure(figsize=(15,7))
  plt.imshow(img_grid)
  plt.show()

show_image_grid(rgb_predictions_dir, "0016E5_07977.jpg")

We'll look at more examples:

In [None]:
show_image_grid(rgb_predictions_dir, "0016E5_07997.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08017.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08037.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08057.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08077.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08097.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08117.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08137.jpg")
show_image_grid(rgb_predictions_dir, "0016E5_08157.jpg")

Now, zip up your predictions on the test set for your best model, **download them locally to your machine**, and submit these to Gradescope:

In [None]:
grayscale_predictions_dir = f"train_epoch_{args.epochs}/camvid-11/{args.base_size}/gray"
!ls -ltrh $grayscale_predictions_dir
!zip -r grayscale_predictions.zip $grayscale_predictions_dir
!ls -ltrh grayscale_predictions.zip

In this section you will load the model trained on the Camvid-11 dataset and train it on the Kitti Road Segmentation dataset.

In [None]:
args.model_path = f"exp/camvid/{args.arch}/model/train_epoch_{args.epochs}.pth"
args.data_root = "./kitti"
args.classes = 2
args.save_path = f"exp/kitti/{args.arch}/model"
args.batch_size = 32
args.batch_size_val = 1
args.dataset = "kitti"
args.evaluate = False
args.epochs = 20

import os

import torch
os.makedirs(args.save_path, exist_ok=True)
print(args)

In [None]:
args.base_lr = 0.01
args.momentum = 0.9
args.weight_decay = 0.0001

In [None]:
from vision.trainer import transfer_train
transfer_train(args, torch.cuda.is_available())

## Don't forget to download the grayscale_predictions.zip and exp folder!