# Setup

## Mount Drive

Mount Google Drive. This will create a new folder, called `My Drive` under `/content/drive/`. It is a FUSE-like filesystem mounted over the network, so the average IO performance is quite slow.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Move into the `BiseNetv1` directory, so that we are in the correct directory for python imports

In [2]:
%cd /content/drive/MyDrive/MLDL/BiseNetv1/

/content/drive/.shortcut-targets-by-id/1H1LF-uIDd32OaHXtZjb_qT8ZexZ_Th3G/MLDL/BiseNetv1


In [None]:
%pwd

'/content/drive/MyDrive/Universita/MLDL/BiseNetv1'

Use autoreload extension so that when we change an imported module, it will get autoreloaded.

In [3]:
%load_ext autoreload
%autoreload 2

## Download

Download `CamVid` and `IDDA` datasets from a Google Cloud Storage bucket. Previously we downloaded them from Google Drive but there is (rightly so) a quite small download quota.

Files are downloaded to the local disk which has much faster IO than the mounted drive.

In [None]:
%%time
%cd /content
%pwd
!gsutil cp gs://recsys-2021-bucket/CamVid.zip /content/CamVid.zip
!gsutil cp gs://recsys-2021-bucket/IDDA.zip /content/IDDA.zip
!unzip -q CamVid.zip
!unzip -q IDDA.zip
!cp /content/drive/MyDrive/Universita/MLDL/classes_info.json /content/IDDA/classes_info.json
%cd /content/drive/MyDrive/Universita/MLDL/BiseNetv1/

/content
Copying gs://recsys-2021-bucket/CamVid.zip...
/ [1 files][579.3 MiB/579.3 MiB]                                                
Operation completed over 1 objects/579.3 MiB.                                    
Copying gs://recsys-2021-bucket/IDDA.zip...
- [1 files][  4.9 GiB/  4.9 GiB]   34.1 MiB/s                                   
Operation completed over 1 objects/4.9 GiB.                                      
/content/drive/MyDrive/Universita/MLDL/BiseNetv1
CPU times: user 1.19 s, sys: 243 ms, total: 1.43 s
Wall time: 2min 16s


In [4]:
import torch

import matplotlib.pyplot as plt

import seaborn as sns
sns.set_theme()
%config InlineBackend.figure_format = 'retina'
from utils import reverse_one_hot, colour_code_segmentation
from torchvision import transforms as T

mean = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32)
std = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32)
unnormalize = T.Normalize((-mean/std).tolist(), (1.0 / std).tolist())

import torch.nn as nn
from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou, colour_code_segmentation
from PIL import Image
import numpy as np

from model.build_BiSeNet import BiSeNet
from torch.utils.data import DataLoader

from dataset.CamVid import CamVid

# MBT (Multi Band Transfer)

## Load 3 separate models

In [5]:
CROP_HEIGHT = 720
CROP_WIDTH = 960

NUM_WORKERS = 0

CAMVID_PATH = ['/content/CamVid/train/', '/content/CamVid/val/']
CAMVID_TEST_PATH = ['/content/CamVid/test/']
CAMVID_LABEL_PATH = ['/content/CamVid/train_labels/', '/content/CamVid/val_labels/']
CAMVID_TEST_LABEL_PATH = ['/content/CamVid/test_labels/']
CSV_CAMVID_PATH = '/content/CamVid/class_dict.csv'

BATCH_SIZE_CAMVID = 2

LOSS = 'dice'

NUM_CLASSES = 12
CONTEXT_PATH = 'resnet101'

In [None]:
model_1 = BiSeNet(NUM_CLASSES, CONTEXT_PATH).cuda().eval()
model_1.load_state_dict(torch.load('/content/drive/MyDrive/Universita/MLDL/BiseNetv1/checkpointBeta01_T1/0.01_50_Generator.pth'))

model_2 = BiSeNet(NUM_CLASSES, CONTEXT_PATH).cuda().eval()
model_2.load_state_dict(torch.load('/content/drive/MyDrive/Universita/MLDL/BiseNetv1/checkpointBeta05_T1/0.05_50_Generator.pth'))

model_3 = BiSeNet(NUM_CLASSES, CONTEXT_PATH).cuda().eval()
model_3.load_state_dict(torch.load('/content/drive/MyDrive/Universita/MLDL/BiseNetv1/checkpointBeta09_T1/0.09_50_Generator.pth'))

## Load CamVid test dataset

In [None]:
target_dataset_test = CamVid(
  image_path=CAMVID_TEST_PATH,
  label_path=CAMVID_TEST_LABEL_PATH,csv_path=CSV_CAMVID_PATH,
  scale=(CROP_HEIGHT, CROP_WIDTH),
  loss=LOSS,
  mode='val'
)
target_dataloader_test = DataLoader(
  target_dataset_test,
  batch_size=1,
  shuffle=True,
  num_workers=NUM_WORKERS,
  drop_last=True,
  pin_memory=True
)
target_dataloader_test_iter = iter(target_dataloader_test)

CPU times: user 11 ms, sys: 0 ns, total: 11 ms
Wall time: 33.9 ms


## Compute the performance of the 3 models average

In [None]:
with torch.no_grad():
  precision_record = []
  hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
  for index, batch in enumerate(target_dataloader_test_iter):
      if index % 10 == 0: print('%d processed' % index)
      image, label = batch
      image = image.cuda()

      output1 = model_1(image)
      output1 = nn.functional.softmax(output1, dim=1)

      output2 = model_2(image)
      output2 = nn.functional.softmax(output2, dim=1)

      output3 = model_3(image)
      output3 = nn.functional.softmax(output3, dim=1)

      output = (output1 + output2 + output3) / 3

      output = output.squeeze()
      output = reverse_one_hot(output)
      output = np.array(output.cpu())

      label = label.squeeze()
      label = reverse_one_hot(label)
      label = np.array(label.cpu())

      precision = compute_global_accuracy(output, label)
      hist += fast_hist(label.flatten(), output.flatten(), NUM_CLASSES)
      precision_record.append(precision)

0 processed
10 processed
20 processed
30 processed
40 processed
50 processed
60 processed
70 processed
80 processed
90 processed
100 processed
110 processed
120 processed
130 processed
140 processed
150 processed
160 processed
170 processed
180 processed
190 processed
200 processed
210 processed
220 processed
230 processed


In [None]:
precision = np.mean(precision_record)
miou = np.mean(per_class_iu(hist))
miou_list = per_class_iu(hist)[:-1]
miou_dict, miou = cal_miou(miou_list, CSV_CAMVID_PATH)
miou = np.mean(miou_list)
print('precision per pixel for test: %.3f' % precision)
print('mIoU for validation: %.3f' % miou)
miou_str = ''
for key in miou_dict:
    miou_str += '{}:{},\n'.format(key, miou_dict[key])
print('mIoU for each class:')
print(miou_str)

precision per pixel for test: 0.699
mIoU for validation: 0.313
mIoU for each class:
Bicyclist:0.005533537967208897,
Building:0.5802598906066335,
Car:0.5961808338506456,
Column_Pole:0.04173963609780111,
Fence:0.01099322715994908,
Pedestrian:0.0004242261546649767,
Road:0.7030851362048675,
Sidewalk:0.3198406713460183,
SignSymbol:0.0011105451690976043,
Sky:0.7578246438015329,
Tree:0.4286469193543627,



# Generating pseudolabels

## Load train target dataset

In [None]:
import os
from tqdm.notebook import tqdm

In [None]:
CAMVID_PATH = ['/content/CamVid/train/', '/content/CamVid/val/']
CAMVID_TEST_PATH = ['/content/CamVid/test/']
CAMVID_LABEL_PATH = ['/content/CamVid/train_labels/', '/content/CamVid/val_labels/']
CAMVID_TEST_LABEL_PATH = ['/content/CamVid/test_labels/']
CSV_CAMVID_PATH = '/content/CamVid/class_dict.csv'

target_dataset_train = CamVid(
  image_path=CAMVID_PATH,
  label_path=CAMVID_LABEL_PATH,csv_path=CSV_CAMVID_PATH,
  scale=(CROP_HEIGHT, CROP_WIDTH),
  loss=LOSS,
  mode='val',
  return_name=True
)
target_dataloader_train = DataLoader(
  target_dataset_train,
  batch_size=1,
  shuffle=True,
  num_workers=NUM_WORKERS,
  drop_last=True,
  pin_memory=True
)
target_dataloader_train_iter = iter(target_dataloader_train)

## Use the 3 models' predictions and apply thresholding to generate the pseudolabels

In [None]:
# Preallocate arrays for:
# The label of every pixel of every image in the dataset
predicted_label = np.zeros((len(target_dataloader_train), 720, 960))
# The label's associated probability
predicted_prob = np.zeros((len(target_dataloader_train), 720, 960))
# The image's name
image_name = []

# For each image in the training dataset
with torch.no_grad():
  target_dataloader_train_iter = iter(target_dataloader_train)
  for index, batch in enumerate(tqdm(target_dataloader_train_iter)):
      if index % 10 == 0: print('%d processed' % index)
      image, _, name = batch
      image = image.cuda()

      # Compute, for each model, its predictions
      output1 = model_1(image)
      output1 = nn.functional.softmax(output1, dim=1)

      output2 = model_2(image)
      output2 = nn.functional.softmax(output2, dim=1)

      output3 = model_3(image)
      output3 = nn.functional.softmax(output3, dim=1)

      # Average the models' predictions
      output = (output1 + output2 + output3) / 3
      output = output.squeeze()

      # Save the label and its probability for each pixel
      prob, label = torch.max(output, dim=0)
      predicted_label[index] = label.cpu().numpy()
      predicted_prob[index] = prob.cpu().numpy()
      image_name.append(name[0])

# Calculate, for each class, the confidence thresholds of the label predictions
thresholds = []
for i in tqdm(range(NUM_CLASSES)):
    x = predicted_prob[predicted_label==i]
    # If there are no pixels in the dataset with this class
    # (e.g. cyclist in our instance)
    if len(x) == 0:
        print(f"no pixels of class {i} found")
        thresholds.append(0)
        continue
    x = np.sort(x)
    
    # 66-th percentile
    thresholds.append(x[np.int(np.round(len(x)*0.66))])

# Clamp the confidence to 0.9
thresholds = np.array(thresholds)
thresholds[thresholds>0.9]=0.9

# For each image in the pseudolabels dataset, if the pixel class probability
# is < class threshold, then consider that pixel as void
for index in tqdm(range(len(target_dataloader_train))):
  name = image_name[index]
  label = predicted_label[index]
  prob = predicted_prob[index]

  for i in range(NUM_CLASSES):
      label[(prob<thresholds[i]) * (label==i)] = 11

  parts = name.split("/")
  filename_and_ex = parts[-1]
  Image.fromarray(colour_code_segmentation(label, target_dataset_train.label_info).astype(np.uint8)).save(
      os.path.join('/content/', 'PSU_test', filename_and_ex)
  )

HBox(children=(FloatProgress(value=0.0, max=468.0), HTML(value='')))

0 processed
10 processed
20 processed
30 processed
40 processed
50 processed
60 processed
70 processed
80 processed
90 processed
100 processed
110 processed
120 processed
130 processed
140 processed
150 processed
160 processed
170 processed
180 processed
190 processed
200 processed
210 processed
220 processed
230 processed
240 processed
250 processed
260 processed
270 processed
280 processed
290 processed
300 processed
310 processed
320 processed
330 processed
340 processed
350 processed
360 processed
370 processed
380 processed
390 processed
400 processed
410 processed
420 processed
430 processed
440 processed
450 processed
460 processed



HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))

no pixels of class 11 found



HBox(children=(FloatProgress(value=0.0, max=468.0), HTML(value='')))


