# PA3 CNN for Semantic Segmentation

Deliverables:
- Avg pixel accuracy. (Boundary is exclusive)
- Avg IoU. (Boundary is exclusive)

## 1 Get and Inspect Data

In [None]:
!python download.py

In [None]:
import torch
from util import *
from voc import *
from torch.utils.data import DataLoader
from torchvision.transforms import *

'''
Image shape : (224, 224, 3)
Num samples : train 209, val 213, test 210
Num classes : 21
Images are in range (0,1)
Masks have values {0,1}
'''
loader = DataLoader(VOC('train') , batch_size=5, shuffle=False)
images, masks = next(iter(loader)) # (B, 3, H, W), (B, H, W)
images = images.detach().numpy()
masks = masks.detach().numpy()

anns = []
for mask in masks:
    classes = []
    for label in np.unique(mask):
        classes.append(f'{label}: ' + class_dict()[label]) 
    anns.append("\n".join(classes))
plot_images(images)
plot_images(masks, pallet=voc_pallet(), annotations=anns)

## 2 Implement Evaluation Metric: IoU and Pixel acc.

In [None]:
from util import *
from voc import *
from torch.utils.data import DataLoader
from torchvision.transforms import *

dataset = VOC('train')
img, mask = dataset.__getitem__(0)

iou = compute_iou(np.zeros((224,224)), mask)
acc = compute_pixel_acc(np.zeros((224,224)), mask)
print(f'IoU: {iou}, Acc: {acc}')

## 3 Baseline Model
- Optim: Adam / Adamw
- Use early stoppling
- Desired result: 0.65 acc, 0.005 IoU

In [None]:
from util import *
from voc import *
from model import *
from train import *

import torchvision.transforms

device =  torch.device('cuda' if torch.cuda.is_available() else 'cpu')
config = {
    'epochs'    : 20,
    'bz'        : 16,
    'lr'        : 5e-4,
    'device'    : device,
    'early_stop': 5,
    'remark'    : 'baseFCN'
}

# Transformations
mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
input_transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(*mean_std)
    ])
target_transform = MaskToTensor()
TF_transform = None

# Dataset and Dataloader initialization
train_loader, val_loader, test_loader = get_train_val_test_loader(
    input_transform, target_transform, TF_transform, config['device'], config['bz'])

''' Prepare model '''
fcn_model = FCN_baseline(n_class=21)
fcn_model.apply(init_weights)
fcn_model = fcn_model.to(config['device'])

optimizer = torch.optim.Adam(fcn_model.parameters(), config['lr'])
criterion =  torch.nn.CrossEntropyLoss()

''' Train model '''
best_iou_score, best_accuracy, min_validation_loss, \
training_loss_history, validation_loss_history, early_stop_epoch \
    = train(fcn_model, train_loader, val_loader, criterion, optimizer, config)

''' Test model '''
_, iou, acc = val(fcn_model, test_loader, criterion)
print(f'Test IOU: {round(iou, 3)}. Test acc: {round(acc, 3)}')

''' Visualize some test sample '''
imgs, masks_gt = next(iter(test_loader))
imgs, masks_gt = imgs[:5], masks_gt[:5]
masks_gt = F.one_hot(masks_gt.to(torch.int64), num_classes=21).permute(0, 3, 1, 2).to(torch.float64)
masks_pred = fcn_model(imgs)
masks_pred = torch.argmax(masks_pred, dim=1).cpu().numpy()
masks_gt = torch.argmax(masks_gt, dim=1).cpu().numpy()
plot_images(np.concatenate([masks_gt,masks_pred]), voc_pallet())

## 4 Improve Baseline Model
- LR schedule: cosine annealing
- Augment dataset: flip, rotate, crop
- Address class imbalance issue with modified loss

In [None]:
from util import *
from voc import *
from model import *
from train import *
import torchvision.transforms.functional as TF

device =  torch.device('cuda' if torch.cuda.is_available() else 'cpu')
config = {
    'epochs'    : 20,
    'bz'        : 16,
    'lr'        : 0.001,
    'device'    : device,
    'early_stop': 5,
    'remark'    : 'modifiedFCN_scheduler'
}

''' Transformations '''
mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
input_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
target_transform = MaskToTensor()
TF_transform = lambda x: [x, TF.hflip(x), TF.rotate(x.unsqueeze(0), angle = 5, fill = 0).squeeze(0), TF.rotate(x.unsqueeze(0), angle = -5, fill = 0).squeeze(0), *TF.five_crop(x, (224, 224))]

''' Data loaders '''
train_loader, val_loader, test_loader = get_train_val_test_loader(
    input_transform, target_transform, TF_transform, config['device'], config['bz'], collate_fn=collate_fn)

# ''' Prepare model '''
fcn_model = FCN_baseline()
fcn_model.apply(init_weights)
fcn_model = fcn_model.to(device)

optimizer = torch.optim.Adam(fcn_model.parameters(), config['lr'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config['epochs'], eta_min = 0.0001)

class_weight = getClassWeights(input_transform, target_transform, TF_transform)
criterion =  torch.nn.CrossEntropyLoss(weight = class_weight)

''' Train model '''
best_iou_score, best_accuracy, min_validation_loss, \
training_loss_history, validation_loss_history, early_stop_epoch \
    = train(fcn_model, train_loader, val_loader, criterion, optimizer, scheduler, config)

In [None]:
''' Test model '''
test_loss, iou, acc = modelTest(fcn_model, test_loader, criterion, early_stop_epoch)
print(f'Test Loss: {test_loss}, Test IOU: {iou}. Test acc: {acc}')

In [None]:
plot_loss_acc(training_loss_history, validation_loss_history, "./Figures/part4d_train_val_loss.png")

In [None]:
imgs, masks_gt = next(iter(test_loader))
imgs, masks_gt = next(iter(test_loader))
imgs, masks_gt = imgs[36:37], masks_gt[36:37]
print(imgs.shape, masks_gt.shape)
masks_gt = F.one_hot(masks_gt.to(torch.int64), num_classes=21).permute(0, 3, 1, 2).to(torch.float64)
masks_pred = fcn_model(imgs)
masks_pred = torch.argmax(masks_pred, dim=1).cpu().numpy()
masks_gt = torch.argmax(masks_gt, dim=1).cpu().numpy()
plot_images(np.concatenate([masks_gt,masks_pred]), voc_pallet(), cols=2, path='./Figures/part4d_test.png')