In [3]:
#dataloader.py
from torch.utils.data import Dataset
from torchvision import transforms


import random
import os
import numpy as np
import glob
import PIL
from tqdm import tqdm 


def rgb2int(arr):
    """
    Convert (N,...M,3)-array of dtype uint8 to a (N,...,M)-array of dtype int32
    """
    return arr[...,0]*(256**2)+arr[...,1]*256+arr[...,2]

def rgb2vals(color, color2ind):
   
    int_colors = rgb2int(color)
    int_keys = rgb2int(np.array(list(color2ind.keys()), dtype='uint8'))
    int_array = np.r_[int_colors.ravel(), int_keys]
    uniq, index = np.unique(int_array, return_inverse=True)
    color_labels = index[:int_colors.size]
    key_labels = index[-len(color2ind):]

    colormap = np.empty_like(int_keys, dtype='int32')
    colormap[key_labels] = list(color2ind.values())
    out = colormap[color_labels].reshape(color.shape[:2])

    return out


class TASDataset(Dataset):
    def __init__(self, data_folder, eval=False, mode=None, augment_data=False):
        self.data_folder = data_folder
        self.eval = eval
        self.mode = mode
        self.augment_data = augment_data

        # You can use any valid transformations here
        # added augment_data flag, which is false by default, if true adds the following transformations.
        if augment_data:
            self.augment = transforms.Compose([
                transforms.ToTensor(),
                transforms.RandomApply(transforms=transforms.RandomRotation(degrees=(0,30),fill=255), p=0.3),
                # transforms.RandomApply(transforms=transforms.Compose([transforms.RandomCrop(size=(120,60)), transforms.Resize((768,384))]),p=0.3),                
                # transforms.RandomRotation(degrees=(0,30),fill=255),
                transforms.RandomHorizontalFlip(p=0.3),
                transforms.RandomVerticalFlip(p=0.3),
            ])
            self.transform = transforms.Compose([transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                                            ])
        else:
            self.transform = transforms.Compose([transforms.ToTensor(),
                                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                                                ])                        

        # we will use the following width and height to resize
        self.width = 768
        self.height = 384

        self.color2class = {
                #terrain
                (192,192,192): 0, (105,105,105): 0, (160, 82, 45):0, (244,164, 96): 0, \
                #vegatation
                ( 60,179,113): 1, (34,139, 34): 1, ( 154,205, 50): 1, ( 0,128,  0): 1, (0,100,  0):1, ( 0,250,154):1, (139, 69, 19): 1,\
                #construction
                (1, 51, 73):2, ( 190,153,153): 2, ( 0,132,111): 2,\
                #vehicle
                (0,  0,142):3, ( 0, 60,100):3, \
                #sky
                (135,206,250):4,\
                #object
                ( 128,  0,128): 5, (153,153,153):5, (255,255,  0 ):5, \
                #human
                (220, 20, 60):6, \
                #animal
                ( 255,182,193):7,\
                #void
                (220,220,220):8, \
                #undefined
                (0,  0,  0):9
        }

        self.input_folder = os.path.join(self.data_folder, 'train')
        self.label_folder = os.path.join(self.data_folder, 'train_labels')

        if self.eval:
            self.input_folder = os.path.join(self.data_folder, 'val')
            self.label_folder = os.path.join(self.data_folder, 'val_labels')
        
        image_names = os.listdir(self.input_folder)
        
        invalid_labels = ['1537962190852671077.png','1539600515553691119.png', '1539600738459369245.png','1539600829359771415.png','1567611260762673589.png']
            
        image_names = list(set(image_names).difference(set(invalid_labels)))
            
        self.paths = [(os.path.join(self.input_folder, i), os.path.join(self.label_folder, i)) for i in image_names]
        
        if self.mode == 'val': # use first 50 images for validation
            self.paths = self.paths[:50]
            
        elif self.mode == 'test': # use last 50 images for test
            self.paths = self.paths[50:]

    
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):        
            
        image = np.asarray(PIL.Image.open(self.paths[idx][0]).resize((self.width, self.height)))
        mask_image = np.asarray(PIL.Image.open(self.paths[idx][1]).resize((self.width, self.height), PIL.Image.NEAREST))
        mask =  rgb2vals(mask_image, self.color2class)

        # Since we are using flipping and rotations, we need to transform the binary mask as well.
        if self.augment_data:
            image = self.augment(image)
            mask = self.augment(mask)

        if self.transform:
            image = self.transform(image).float()

        return image, mask

In [6]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [10]:
#utils.py
import numpy as np
import torch


def get_loss_weights(weighting_method=None):
# Get the loss according to different weighting methods, output will be tensor if not None
  sample_per_class = np.load('sample_per_class.npy')
  # sample_per_class = torch.from_numpy(sample_per_class.astype(np.float32))
  # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  # sample_per_class = sample_per_class.to(device)
  if weighting_method == 'INS':
    loss_weights = ins_loss_weights(sample_per_class)
  elif weighting_method == 'ISNS':
    loss_weights = ins_loss_weights(sample_per_class, power=0.5)
  elif weighting_method == 'ENS':
    loss_weights = ens_loss_weights(sample_per_class)
  elif weighting_method == 'basic':
    loss_weights = basic_loss_weights(sample_per_class)
  else:
    return None
  return loss_weights


def ins_loss_weights(sample_per_class, power=1):
  loss_weights = np.power(sample_per_class/sample_per_class.sum(), power)
  loss_weights = 1/loss_weights
  loss_weights = torch.from_numpy(loss_weights)
  return loss_weights 


def basic_loss_weights(sample_per_class):
  loss_weights = sample_per_class/sample_per_class.sum()
  loss_weights = 1 - loss_weights
  loss_weights = torch.from_numpy(loss_weights)
  return loss_weights


def ens_loss_weights(sample_per_class):
  beta = 0.9
  normalised_per_class = sample_per_class/sample_per_class.sum()
  effective_num = 1 - np.power(beta,normalised_per_class)
  loss_weights = effective_num / (1-beta)
  loss_weights = 1/loss_weights
  loss_weights = torch.from_numpy(loss_weights)
  return loss_weights


def iou(pred, target, n_classes = 10):
  ious = []
  pred = pred.view(-1)
  target = target.view(-1)

  # Ignore IoU for undefined class ("9")
  for cls in range(n_classes-1):  # last class is ignored
    pred_inds = pred == cls
    target_inds = target == cls
    
    #BB - implemented intersection/union
    intersection = (pred_inds*target_inds).sum().item()
    union = pred_inds.sum().item() + target_inds.sum().item() - intersection
    
    if union == 0:
      ious.append(float('nan'))  # If there is no ground truth, do not include in evaluation
    else:
      ious.append(intersection/union)

  return np.array(ious)
  

def pixel_acc(pred, target, n_classes = 10):
  
  #BB - Keep track of total count
  correct = 0
  total = 0

  #Reshape
  pred = pred.view(-1)
  target = target.view(-1)

  for cls in range(n_classes-1):

    #BB - Identify preds and targets
    pred_inds = pred == cls
    target_inds = target == cls
    
    #BB - Add to correct if correct, and add to total samps
    correct += (pred_inds*target_inds).sum().item()
    total += target_inds.sum().item()
  
  #BB - Return fraction
  return correct/total



In [12]:
#starter.py
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
import time
from torch.utils.data import DataLoader
import torch
import gc
import copy


#model save path:
FILE = 'fcn_model.pth'

#BB - Batch size?
batchsize = 32

path = '/content/drive/MyDrive/Colab_Notebooks/nn_hw3/tas500v1.1'
train_dataset = TASDataset(path) 
val_dataset = TASDataset(path, eval=True, mode='val')
test_dataset = TASDataset(path, eval=True, mode='test')


train_loader = DataLoader(dataset=train_dataset, batch_size=batchsize, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batchsize, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batchsize, shuffle=False)


In [13]:
#TESTING RESNET WITH TRANSFER LEARNING
import torch.nn as nn
from torchvision import models

   
class FCN(nn.Module):

    def __init__(self, n_class):
        super().__init__()
        self.n_class = n_class
        #pre-trained encoder
        resnet = models.resnet34(pretrained=True)
        self.features =  nn.Sequential(*(list(resnet.children())[:-2]))   #remove the fc layer and max pool layer
        self.relu    = nn.ReLU(inplace=True)
        #decode
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1     = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2     = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3     = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4     = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn5     = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, self.n_class, kernel_size=1)

    def forward(self, x):
        
        # Encoder
        x5 =  self.features(x)
        # Decoder
        y1 = self.bn1(self.relu(self.deconv1(x5)))
        y2 = self.bn2(self.relu(self.deconv2(y1)))
        y3 = self.bn3(self.relu(self.deconv3(y2)))
        y4 = self.bn4(self.relu(self.deconv4(y3)))
        y5 = self.bn5(self.relu(self.deconv5(y4)))    
        score = self.classifier(y5)                   
        # for param in self.features.parameters():
        #   param.requires_grad = False

        return score  # size=(N, n_class, x.H/1, x.W/1)


In [14]:
# def init_weights(m):
#     if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
#         torch.nn.init.xavier_uniform_(m.weight.data)
#         torch.nn.init.normal_(m.bias.data) #xavier not applicable for biases   

def init_weights(m):
    if isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        torch.nn.init.normal_(m.bias.data) #xavier not applicable for biases   



#BB - Changed epochs
epochs = 100

#N epochs patience
epoch_no_imp = 0
epoch_stop = 15
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
# Choose an appropriate loss function from https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html
# The below function will return None by default, in order to get loss_weights, give weighting_method argument
loss_weights = get_loss_weights(weighting_method='basic').type(torch.FloatTensor).to(device)
# loss_weights = torch.FloatTensor(loss_weights).cuda()
if loss_weights is not None:
    criterion = nn.CrossEntropyLoss(weight=loss_weights) #BB - Chose CEL
    print('class weights used')
else:
    criterion = nn.CrossEntropyLoss() #BB - Chose CEL
n_class = 10
fcn_model = FCN(n_class=n_class)
fcn_model.apply(init_weights)

#BB - Adding lr:
lr = 0.001

#batch size 
batchsize = 8

#BB - added optimizer
optimizer = optim.Adam(fcn_model.parameters(), lr=0.01) # choose an optimizer

#BB - Adding lr scheduler:
#scheduler = ExponentialLR(optimizer, 0.9) ###added schedule

#BB - set device choice

fcn_model = fcn_model.to(device) #transfer the model to the device


class weights used


In [15]:
type(criterion.weight)

torch.Tensor

In [16]:
def train():

    best_iou_score = 0.0
    best_acc = 0.0
    best_loss = 0.0
    epoch_no_imp = 0

    train_loss = []
    valid_loss= []


    for epoch in range(epochs):
        ts = time.time()
        losses = []

        for iter, (inputs, labels) in enumerate(train_loader):
            
            #BB - reset optimizer gradients
            optimizer.zero_grad()

            # both inputs and labels have to reside in the same device as the model's
            inputs = inputs.type(torch.FloatTensor).to(device) #transfer the input to the same device as the model's
            labels = labels.type(torch.LongTensor).to(device) #transfer the labels to the same device as the model's

            outputs = fcn_model(inputs) #we will not need to transfer the output, it will be automatically in the same device as the model's!
            
            #BB
            loss = criterion(outputs, labels) #calculate loss
            losses.append(loss.item())

            #BB - backpropagate
            loss.backward()

            #BB - update the weights
            optimizer.step()
        
        #Turn on if you want to mess with scheduling lr
        #scheduler.step()
            
            if iter % 10 == 0:
                print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss.item()))
        
        print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        

        current_miou_score, current_acc, current_loss = val(epoch)
        valid_loss.append(current_loss)
        train_loss.append(np.mean(losses))
        
        
        if current_miou_score > best_iou_score:
            best_iou_score = current_miou_score
            best_acc = current_acc
            best_loss = current_loss

            #Save to model
            torch.save(fcn_model.state_dict(), FILE)

        else:
          epoch_no_imp += 1

          if epoch_no_imp >= epoch_stop:
            print(f'No improvement after epoch {epoch-epoch_stop}')
            print(f'iou: {best_iou_score}, pixel acc: {best_acc}, loss: {best_loss}')
            return train_loss, valid_loss
            break
    return train_loss, valid_loss
            
    

def val(epoch):
    fcn_model.eval() # Put in eval mode (disables batchnorm/dropout) !
    
    losses = []
    mean_iou_scores = []
    accuracy = []

    with torch.no_grad(): # we don't need to calculate the gradient in the validation/testing

        for iter, (input, label) in enumerate(val_loader):

            # both inputs and labels have to reside in the same device as the model's
            input = input.type(torch.FloatTensor).to(device) #transfer the input to the same device as the model's
            label = label.type(torch.LongTensor).to(device) #transfer the labels to the same device as the model's

            output = fcn_model(input)


            loss = criterion(output, label) #calculate the loss
            losses.append(loss.item()) #call .item() to get the value from a tensor. The tensor can reside in gpu but item() will still work 

            pred = torch.argmax(output, dim=1) # Make sure to include an argmax to get the prediction from the outputs of your model

            mean_iou_scores.append(np.nanmean(iou(pred, label, n_class)))  # Complete this function in the util, notice the use of np.nanmean() here
        
            accuracy.append(pixel_acc(pred, label)) # Complete this function in the util


    print(f"\nLoss at epoch: {epoch} is {np.mean(losses)}")
    print(f"IoU at epoch: {epoch} is {np.mean(mean_iou_scores)}")
    print(f"Pixel acc at epoch: {epoch} is {np.mean(accuracy)}\n")

    fcn_model.train() #DONT FORGET TO TURN THE TRAIN MODE BACK ON TO ENABLE BATCHNORM/DROPOUT!!

    return np.mean(mean_iou_scores), np.mean(accuracy), np.mean(losses)

def test():

    #Load model
    fcn_model = FCN(n_class=n_class)
    fcn_model.load_state_dict(torch.load(FILE))
    fcn_model.to(device)

    losses = []
    mean_iou_scores = []
    accuracy = []

    with torch.no_grad(): # we don't need to calculate the gradient in the validation/testing

        for iter, (input, label) in enumerate(test_loader):

            # both inputs and labels have to reside in the same device as the model's
            input = input.type(torch.FloatTensor).to(device) #transfer the input to the same device as the model's
            label = label.type(torch.LongTensor).to(device)#transfer the labels to the same device as the model's

            output = fcn_model(input)

            loss = criterion(output, label) #calculate the loss
            losses.append(loss.item()) 

            pred = torch.argmax(output, dim=1) 
            mean_iou_scores.append(np.nanmean(iou(pred, label, n_class)))
            accuracy.append(pixel_acc(pred, label))

    print(f"Loss is {np.mean(losses)}")
    print(f"IoU is {np.mean(mean_iou_scores)}")
    print(f"Pixel acc is {np.mean(accuracy)}")


In [17]:
    gc.collect() 
    torch.cuda.empty_cache()
    torch.cuda.device_count() 

In [None]:
if __name__ == "__main__":
    val(0)  # show the accuracy before training
    train_loss, valid_loss = train()
    test()
    
    # housekeeping
    gc.collect() 
    torch.cuda.empty_cache()

  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()



Loss at epoch: 0 is 2.1540205478668213
IoU at epoch: 0 is 0.03069301570987771
Pixel acc at epoch: 0 is 0.1844885632995873

epoch0, iter0, loss: 2.3874359130859375
epoch0, iter10, loss: 1.7612696886062622
Finish epoch 0, time elapsed 59.130473613739014

Loss at epoch: 0 is 22362.88671875
IoU at epoch: 0 is 0.06216060216393744
Pixel acc at epoch: 0 is 0.3145453685749465

epoch1, iter0, loss: 1.5115562677383423
epoch1, iter10, loss: 1.0521812438964844
Finish epoch 1, time elapsed 57.94193911552429

Loss at epoch: 1 is 6.869622707366943
IoU at epoch: 1 is 0.0847652757605042
Pixel acc at epoch: 1 is 0.32784163805267946

epoch2, iter0, loss: 0.9782946109771729
epoch2, iter10, loss: 1.006362795829773
Finish epoch 2, time elapsed 57.543009996414185

Loss at epoch: 2 is 1.9127963781356812
IoU at epoch: 2 is 0.13930783686225964
Pixel acc at epoch: 2 is 0.5566858056945647

epoch3, iter0, loss: 0.845603883266449
epoch3, iter10, loss: 0.7462591528892517
Finish epoch 3, time elapsed 57.923908710479

In [None]:
train_loss
valid_loss

In [None]:
from matplotlib import pyplot as plt

plt.plot(train_loss,"-b", label="Training Loss")
plt.plot(valid_loss,"-r", label="Validation Loss")
plt.locator_params(axis="x", integer=True, tight=True)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Transfer Learning (res34)" + " Training and Validation Loss")
plt.legend(loc="upper right")
# plt.savefig(title + ' Loss.png', bbox_inches='tight')
plt.show()
plt.clf()
