In [1]:
# part 3 experimentation: Unsupervised domain adaptation using pseudo-ground truth generation
# major library dependencies: jupyter, numpy, matplotlib, pytorch, scikit-image, pillow

# Fine-tuning with Pseudo-ground Truth
import torch
from dataset import camvidLoader
from torch.utils.data import Dataset, DataLoader
import numpy as np
import data_aug as aug
from copy import deepcopy
import torch.nn.functional as F
from unet import UNet
from tempfile import TemporaryDirectory
import os
import matplotlib.pyplot as plt
from skimage.io import imsave

import warnings
warnings.filterwarnings("ignore")

device = 'cpu' # can be set to "cuda" if you have a GPU
unet = torch.load('camvid_sunny_model.pt', map_location=torch.device(device))

data_root = './CamVid/cloudy'
train_data = camvidLoader(root=data_root, split='train', is_aug=False, img_size = [256, 256], is_pytorch_transform=True)
test_data = camvidLoader(root=data_root, split='test', is_aug=False, img_size = [256, 256], is_pytorch_transform=True)

num_classes = 14 # number of classes is always 14 for this project.
labels = ['Sky', 'Building', 'Pole', 'Road', 'LaneMarking', 'SideWalk', 'Pavement', 'Tree', 'SignSymbol', 
          'Fence', 'Car_Bus', 'Pedestrian', 'Bicyclist', 'Others']

## Load parameters, model and dataset

In [2]:
# load hyper parameters
batch_size = 4
num_workers = 8
lr = 5e-6
epochs = 5

# load train and test dataset
train_dataset = camvidLoader(root=data_root, split='train', is_aug=False, img_size = [256, 256], 
                             is_pytorch_transform = True, aug = None) 
train_loader = DataLoader(train_dataset, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = camvidLoader(root=data_root, split='val', is_aug=False, img_size = [256, 256], 
                             is_pytorch_transform = True, aug = None) 
val_loader = DataLoader(train_dataset, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True)

test_loader = DataLoader(test_data, num_workers=num_workers, batch_size=batch_size, shuffle=False, drop_last=False)

In [3]:
# import pre-trained unet model
unet_model = UNet(3, num_classes, width=32, bilinear=True)
unet = torch.load('camvid_sunny_model.pt', map_location=torch.device(device))
unet_model.load_state_dict(unet.state_dict())
unet_model = unet_model.to(device)

# define loss function and optimizer
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(unet_model.parameters(), lr=lr)

In [4]:
# write the function that computes the entropy map for the unet output
def compute_entropy_map(model_output):

    pixel_entropy = []
    
    for idx in range(0, model_output.shape[0]):
    
        # output size is 14*256*256
        probs = F.softmax(model_output[idx], dim=0)

        # calculate the entropy for each pixel
        epsilon = 1e-5
        entropy = -torch.sum(probs * torch.log(probs + epsilon), dim=0)
        pixel_entropy.append(entropy.cpu().detach().numpy())

    return pixel_entropy

In [None]:
# function to select prediction with low uncertainty
def sel_prediction(y, confidence):


    for idx in range(y.shape[0]):

        confidence_ = confidence[idx]

        # select pixels that with high uncertainty
        condition_indices = np.where(confidence_ < -1.96)

        # set 0 to selected pixels with 14 channels
        for channel in range(14):
            y[idx, channel][condition_indices] = 0

    return y

In [None]:
# function to select pseudo labels with low uncertainty
def sel_pseudo(pseudo_labels, confidence):

    for idx in range(pseudo_labels.shape[0]):

        confidence_ = confidence[idx]

        # select pixels that with high uncertainty
        condition_indices = np.where(confidence_ < -1.96)

        # set 0 to selected pixels
        pseudo_labels[idx][condition_indices] = 0

    return pseudo_labels


In [None]:
# create a file to store checkpoint model parameters
model_dir = './model_dir/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

with TemporaryDirectory() as tempdir:

    # the path to store the best parameters
    best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')
    torch.save(unet_model.state_dict(), best_model_params_path)

    best_loss = float('Inf')

    for epoch in range(epochs):

        # start trainning loop
        unet_model.train()
        train_loss = 0
        count = 0

        for idx_batch, (imagergb, _, filename) in enumerate(train_loader):

            optimizer.zero_grad()
            
            # send to the device (GPU or CPU) and do a forward pass
            x = imagergb.to(device)

            # pretained unet model to predict y
            y = unet_model(x)
            
            # calculate entropy map here. 
            entropy = compute_entropy_map(y)
            confidence = [-x for x in entropy] 

            # predict the pseudo labels at each pixel
            pseudo_labels = torch.argmax(y, dim=1)

            # find y and pseudo labels with high confidence
            y_high_conf = sel_prediction(y, confidence)
            pseudo_labels_high_conf = sel_pseudo(pseudo_labels, confidence)

            # finally calculate the loss and back propagate
            loss = loss_func(y_high_conf, pseudo_labels_high_conf)
            loss.backward()
            optimizer.step()

            if idx_batch % 2 == 0:
                print("train epoch = " + str(epoch) + " | batch = " + str(idx_batch) + " | loss = "+str(loss.item()))

            train_loss += loss.item()
            count += 1

        train_loss /= count

        # store the best parameters when the loss is mininal
        if train_loss < best_loss:
            best_loss = train_loss
            torch.save(unet_model.state_dict(), best_model_params_path)

        print('epoch ' + str(epoch) + ', training loss = ' + str(train_loss))

        # store the check point parameters into dir
        model_location = model_dir + "model_file_epoch_" + str(epoch) + ".pt"
        torch.save(unet_model, model_location)

    # import the best parameters to unet model
    unet_model.load_state_dict(torch.load(best_model_params_path))

train epoch = 0 | batch = 0 | loss = 0.37568891048431396
train epoch = 0 | batch = 2 | loss = 0.4533589482307434
train epoch = 0 | batch = 4 | loss = 0.38332900404930115
train epoch = 0 | batch = 6 | loss = 0.42419129610061646
train epoch = 0 | batch = 8 | loss = 0.4331427216529846
train epoch = 0 | batch = 10 | loss = 0.37523218989372253
train epoch = 0 | batch = 12 | loss = 0.42762961983680725
train epoch = 0 | batch = 14 | loss = 0.4290653467178345
train epoch = 0 | batch = 16 | loss = 0.4671803116798401
epoch 0, training loss = 0.4235711478524738
train epoch = 1 | batch = 0 | loss = 0.3985866606235504
train epoch = 1 | batch = 2 | loss = 0.4785328805446625
train epoch = 1 | batch = 4 | loss = 0.4249192476272583
train epoch = 1 | batch = 6 | loss = 0.4987383186817169
train epoch = 1 | batch = 8 | loss = 0.3791738450527191
train epoch = 1 | batch = 10 | loss = 0.4353066682815552
train epoch = 1 | batch = 12 | loss = 0.3696546256542206
train epoch = 1 | batch = 14 | loss = 0.447272002

## Evaluate cloudy dataset performance

In [8]:
# evaluation metric of accuracy.
def global_accuracy_metric(y_true, y_pred):
    return np.sum(y_true == y_pred)/y_pred.size

# evaluation metric of iou.
def IoU_metric(y_true, y_pred):

    iou_per_image = []
    
    for i in range(num_classes):
        intersection = np.logical_and(y_pred == i, y_true == i).sum()
        union = np.logical_or(y_pred == i, y_true == i).sum()
        
        # if the union is 0, then the iou should be null
        # otherwise, the iou is intersection/union
        if union == 0:
            iou = np.NAN
        else:
            iou = intersection/union
            
        iou_per_image.append(iou)

    return iou_per_image

In [9]:
global_acc = []
perclass_acc = []
img_file = []

# start evaluation
unet_model.eval()

for idx_batch, (imagergb, labelmask, filename) in enumerate(test_loader):

    img_file.extend(filename)
    
    with torch.no_grad(): 

        x = imagergb.to(device) 
        y_ = labelmask.to(device) 
        y = unet_model(x)

        for idx in range(0, y.shape[0]):

            # choose the most likely label
            max_index = torch.argmax(y[idx], dim=0).cpu().int().numpy()
            gt_correct_format = y_[idx].cpu().int().numpy()

            # calculate the global accuracy of each image
            correct_prediction = global_accuracy_metric(gt_correct_format, max_index)
            global_acc.append(correct_prediction)

            # calculate the iou per class of each image
            iou_per_image = IoU_metric(gt_correct_format, max_index)
            perclass_acc.append(iou_per_image)

In [10]:
# print and calculate the global image accuracy 
print(f'The global accuracy overall image is: {np.mean(global_acc)}')

# print and calculate the average mIOU
overall_class_iou = np.nanmean(perclass_acc, axis=0)
print(f'The average mIoU scores is: {np.nanmean(overall_class_iou)}\n')

# print and calculate the IOU per class
for idx in range(num_classes):
    print(f'The overall IOU scores for class {labels[idx]} is {overall_class_iou[idx]}')

The global accuracy overall image is: 0.7224432627360026
The average mIoU scores is: 0.3300943880664946

The overall IOU scores for class Sky is 0.863187620143021
The overall IOU scores for class Building is 0.4815343172450504
The overall IOU scores for class Pole is 0.0
The overall IOU scores for class Road is 0.6294435525621992
The overall IOU scores for class LaneMarking is 0.1297391690535898
The overall IOU scores for class SideWalk is 0.7259996840837188
The overall IOU scores for class Pavement is nan
The overall IOU scores for class Tree is 0.6299893687837966
The overall IOU scores for class SignSymbol is 0.0
The overall IOU scores for class Fence is 0.005950273496382147
The overall IOU scores for class Car_Bus is 0.525398801338756
The overall IOU scores for class Pedestrian is 0.2916169040041272
The overall IOU scores for class Bicyclist is 0.0
The overall IOU scores for class Others is 0.008367354153787494
