In [0]:
# Mounting your Google Drive is optional, and you could also simply copy and
# upload the data to your colab instance. This manula upload is also easy to do, 
# but you will have to figure out how to do it.
# from google.colab import drive
# drive.mount('/content/')

In [0]:
import os
os.chdir("/content/")
# if not os.path.exists("/content/gdrive/My Drive/CS_543_MP4"):
#     os.makedirs("/content/gdrive/My Drive/CS_543_MP4")

# from google.colab import files
# uploaded = files.upload()

In [0]:
!unzip data.zip -d data

In [0]:
import glob
import os
import numpy as np
import seaborn as sns
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, average_precision_score

from PIL import Image
import torch
from torch import nn
from torch.utils import data
from torchvision.transforms import ToTensor

In [0]:
DATASET_PATH = 'data/data/sbd/'

class SegmentationDataset(data.Dataset):
    """
    Data loader for the Segmentation Dataset. If data loading is a bottleneck, 
    you may want to optimize this in for faster training. Possibilities include
    pre-loading all images and annotations into memory before training, so as 
    to limit delays due to disk reads.
    """
    def __init__(self, split="train", data_dir=DATASET_PATH):
        assert(split in ["train", "val", "test"])
        self.img_dir = os.path.join(data_dir, split)
        self.classes = []
        with open(os.path.join(data_dir, 'classes.txt'), 'r') as f:
          for l in f:
            self.classes.append(l.rstrip())
        self.n_classes = len(self.classes)
        self.split = split
        self.data = glob.glob(self.img_dir + '/*.jpg') 
        self.data = [os.path.splitext(l)[0] for l in self.data]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img = Image.open(self.data[index] + '.jpg')
        gt = Image.open(self.data[index] + '.png')
        
        img = ToTensor()(img)
        gt = torch.LongTensor(np.asarray(gt)).unsqueeze(0)
        return img, gt

In [0]:
def segmentation_eval(gts, preds, classes, plot_file_name):
    """
    @param    gts               numpy.ndarray   ground truth labels
    @param    preds             numpy.ndarray   predicted labels
    @param    classes           string          class names
    @param    plot_file_name    string          plot file names
    """
    ious, counts = compute_confusion_matrix(gts, preds)
    aps = compute_ap(gts, preds)
    plot_results(counts, ious, aps, classes, plot_file_name)
    for i in range(len(classes)):
        print('{:>20s}: AP: {:0.2f}, IoU: {:0.2f}'.format(classes[i], aps[i], ious[i]))
    print('{:>20s}: AP: {:0.2f}, IoU: {:0.2f}'.format('mean', np.mean(aps), np.mean(ious)))
    return aps, ious

def plot_results(counts, ious, aps, classes, file_name):
    fig, ax = plt.subplots(1,1)
    conf = counts / np.sum(counts, 1, keepdims=True)
    conf = np.concatenate([conf, np.array(aps).reshape(-1,1), 
                           np.array(ious).reshape(-1,1)], 1)
    conf = conf * 100.
    sns.heatmap(conf, annot=True, ax=ax, fmt='3.0f') 
    arts = [] 
    # labels, title and ticks
    _ = ax.set_xlabel('Predicted labels')
    arts.append(_)
    _ = ax.set_ylabel('True labels')
    arts.append(_)
    _ = ax.set_title('Confusion Matrix, mAP: {:5.1f}, mIoU: {:5.1f}'.format(
      np.mean(aps)*100., np.mean(ious)*100.))
    arts.append(_)
    _ = ax.xaxis.set_ticklabels(classes + ['AP', 'IoU'], rotation=90)
    arts.append(_)
    _ = ax.yaxis.set_ticklabels(classes, rotation=0)
    arts.append(_)
    fig.savefig(file_name, bbox_inches='tight')

def compute_ap(gts, preds):
    aps = []
    for i in range(preds.shape[1]):
      ap, prec, rec = calc_pr(gts == i, preds[:,i:i+1,:,:])
      aps.append(ap)
    return aps

def calc_pr(gt, out, wt=None):
    gt = gt.astype(np.float64).reshape((-1,1))
    out = out.astype(np.float64).reshape((-1,1))

    tog = np.concatenate([gt, out], axis=1)*1.
    ind = np.argsort(tog[:,1], axis=0)[::-1]
    tog = tog[ind,:]
    cumsumsortgt = np.cumsum(tog[:,0])
    cumsumsortwt = np.cumsum(tog[:,0]-tog[:,0]+1)
    prec = cumsumsortgt / cumsumsortwt
    rec = cumsumsortgt / np.sum(tog[:,0])
    ap = voc_ap(rec, prec)
    return ap, rec, prec

def voc_ap(rec, prec):
    rec = rec.reshape((-1,1))
    prec = prec.reshape((-1,1))
    z = np.zeros((1,1)) 
    o = np.ones((1,1))
    mrec = np.vstack((z, rec, o))
    mpre = np.vstack((z, prec, z))

    mpre = np.maximum.accumulate(mpre[::-1])[::-1]
    I = np.where(mrec[1:] != mrec[0:-1])[0]+1;
    ap = np.sum((mrec[I] - mrec[I-1])*mpre[I])
    return ap

def compute_confusion_matrix(gts, preds):
    preds_cls = np.argmax(preds, 1)
    gts = gts[:,0]
    #print(gts.shape, preds.shape)
    conf = confusion_matrix(gts.ravel(), preds_cls.ravel())
    inter = np.diag(conf)
    union = np.sum(conf, 0) + np.sum(conf, 1) - np.diag(conf)
    union = np.maximum(union, 1)
    return inter / union, conf

In [0]:
class MyModel(nn.Module):

    def __init__(self, n_class = 9, n_channel = 3, bilinear = True): # feel free to modify input paramters
        super(MyModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.base = list(self.resnet.children())
        self.layer0 = nn.Sequential(*self.base[:6])
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.conv4 = nn.Conv2d(128, 64, 3, stride = 1, padding = 1)
        self.norm4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(64, n_class, 3, stride = 1, padding = 1)
    def forward(self, x):
        h = self.layer0(x)
        #print(h.shape)
        h = self.upsample(h)
        #print(h.shape)
        h = self.upsample(h)
        #print(h.shape)
        h = self.upsample(h)
        h = self.conv4(h)
        h = self.norm4(h)
        h = self.conv5(h)
        #print(h.shape)
        return h

In [0]:
torch.cuda.empty_cache()

In [0]:
# Create an instance of the nn.module class defined above:
net = MyModel()
net = net.cuda() 

In [0]:
from torchsummary import summary
summary(net, (3,224,288))

In [0]:
print(net)

In [0]:
import torch.optim as optim
from torch.autograd import Variable


# Tune the learning rate.
# See whether the momentum is useful or not
#optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)


w_decay    = 1e-5
lr = 1e-4
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr)
# optimizer = optim.RMSprop(net.parameters(), lr=0.005, momentum=0, weight_decay=1e-4)

train_loss_over_epochs = []
aps_over_epochs = []
ious_over_epochs = []
# torch.cuda.empty_cache()


In [0]:
# This is a trivial semantic segmentor. For eqch pixel location it computes the 
# distribution of the class label in the training set and uses that as the 
# prediction. Quite unsuprisingly it doesn't perform very well. Though we provide
# this code so that you can understand the data formats for the benchmarking 
# functions.
########################################################################
# TODO: Implement your training cycles, make sure you evaluate on validation 
# dataset and compute evaluation metrics every so often. 
# You may also want to save models that perform well.

EPOCHS = 20
train_dataset = SegmentationDataset(split='train')
train_dataloader = data.DataLoader(train_dataset, batch_size=8, 
                                       shuffle=True, num_workers=4, 
                                       drop_last=True)
val_dataset = SegmentationDataset(split='val')
val_dataloader = data.DataLoader(val_dataset, batch_size=8, 
                                       shuffle=True, num_workers=4, 
                                       drop_last=True)
for epoch in tqdm(range(EPOCHS), total=EPOCHS):
    running_loss = 0.0
    net.train()
    
    for i, batch in enumerate(tqdm(train_dataloader)):
      img, gt = batch
      img = img.cuda()
      gt = gt.cuda()
      optimizer.zero_grad()
      outputs = net(img)
      #print(outputs.shape, gt.shape)
      loss = criterion(outputs, gt[:,0])
      loss.backward()
      optimizer.step()
        # print statistics
      running_loss += loss.item()
      
     # Normalizing the loss by the total number of train batches
    running_loss/=len(train_dataloader)
    print('[%d] loss: %.3f' %
          (epoch + 1, running_loss))
    with torch.no_grad():
      net.eval()
      gts, preds, classes = simple_predict('val', net)
      aps, ious = segmentation_eval(gts, preds, classes, 'cs543-simple-val.pdf')
    train_loss_over_epochs.append(running_loss)
    aps_over_epochs.append(np.mean(aps))
    ious_over_epochs.append(np.mean(ious))
    




In [0]:
# Plot train loss over epochs and val set accuracy over epochs
# Nothing to change here
# -------------
plt.ioff()
fig = plt.figure()
plt.ylabel('Train loss')
plt.xlabel('Epochs')
plt.plot(np.arange(EPOCHS), train_loss_over_epochs, 'k-')
plt.title('Train loss vs Epochs')
plt.xticks(np.arange(EPOCHS, dtype=int))
plt.grid(True)
plt.show()
# plt.savefig("plot.png")
# plt.close(fig)

# plt.subplot(2, 1, 2)
# plt.plot(np.arange(EPOCHS), aps_over_epochs, 'b-')
# plt.ylabel('mAP')
# plt.xlabel('Epochs')
# plt.xticks(np.arange(EPOCHS, dtype=int))
# plt.grid(True)
# plt.savefig("plot1.png")
# plt.close(fig)
# print('Finished Training')

# plt.subplot(2, 1, 2)
# plt.plot(np.arange(EPOCHS), ious_over_epochs, 'g-')
# plt.ylabel('mIOUs')
# plt.xlabel('Epochs')
# plt.xticks(np.arange(EPOCHS, dtype=int))
# plt.grid(True)
# plt.savefig("plot2.png")
# plt.close(fig)
# print('Finished Training')
# # -------------

In [0]:

def simple_predict(split, model):
    dataset = SegmentationDataset(split=split, data_dir=DATASET_PATH)
    dataloader = data.DataLoader(dataset, batch_size=1, shuffle=False, 
                                 num_workers=0, drop_last=False)
    gts, preds = [], []
    for i, batch in enumerate(tqdm(dataloader)):
      img, gt = batch
      print(img.shape)
      img = img.cuda()
      gt = gt.cuda()
      outputs = net(Variable(img))
      outputs = F.softmax(outputs)
      gts.append(gt.data.cpu().numpy())
      preds.append(outputs[0].data.cpu().numpy())
    gts = np.array(gts)
    preds = np.array(preds)
    return gts, preds, list(dataset.classes)


In [0]:
with torch.no_grad():
      net.eval()
      gts, preds, classes = simple_predict('test', net)
      aps, ious = segmentation_eval(gts, preds, classes, 'cs543-simple-val.pdf')

In [0]:
########################################################################
# TODO: Evaluate your result, and report Mean average precision on test dataset 
# using provided helper function. Here we show how we can train and evaluate the 
# simple model that we provided on the validation set. You will want to report
# performance on the validation set for the variants you tried, and the 
# performance of the final model on the test set.

gts, preds, classes = simple_predict('test', model)
aps, ious = segmentation_eval(gts, preds, classes, 'cs543-simple-val.pdf')

In [0]:
print(gts[0].shape,preds.shape)

In [0]:
np.mean(ious)