# Introduction

<div class="alert alert-danger">

  Originally copied from https://github.com/Sayan98/pytorch-segnet

</div>

# Imports

In [1]:
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import PIL
import torch
import segnet

# VOC2007 Dataset

In [2]:
# TODO: Change me

VOC_CLASSES = ('background',  # always index 0
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor')

NUM_CLASSES = len(VOC_CLASSES) + 1

In [8]:
VOC_CLASSES = ('background',  # always index 0
               'pipe')

NUM_CLASSES = len(VOC_CLASSES)

In [9]:
# TODO: Change me

class PascalVOCDataset(torch.utils.data.Dataset):
    """Pascal VOC 2007 Dataset"""
    def __init__(self, list_file, img_dir, mask_dir, transform=None):
        self.images = open(list_file, "rt").read().split("\n")[:-1]
        self.transform = transform

        self.img_extension = ".jpg"
        self.mask_extension = ".png"

        self.image_root_dir = img_dir
        self.mask_root_dir = mask_dir

        self.counts = self.__compute_class_probability()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        name = self.images[index]
        image_path = os.path.join(self.image_root_dir, name + self.img_extension)
        mask_path = os.path.join(self.mask_root_dir, name + self.mask_extension)

        image = self.load_image(path=image_path)
        gt_mask = self.load_mask(path=mask_path)

        data = {
                    'image': torch.FloatTensor(image),
                    'mask' : torch.LongTensor(gt_mask)
                    }

        return data

    def __compute_class_probability(self):
        counts = dict((i, 0) for i in range(NUM_CLASSES))

        for name in self.images:
            mask_path = os.path.join(self.mask_root_dir, name + self.mask_extension)

            raw_image = PIL.Image.open(mask_path).resize((224, 224))
            imx_t = np.array(raw_image).reshape(224*224)
            imx_t[imx_t==255] = len(VOC_CLASSES)

            for i in range(NUM_CLASSES):
                counts[i] += np.sum(imx_t == i)

        return counts

    def get_class_probability(self):
        values = np.array(list(self.counts.values()))
        p_values = values/np.sum(values)

        return torch.Tensor(p_values)

    def load_image(self, path=None):
        raw_image = PIL.Image.open(path)
        raw_image = np.transpose(raw_image.resize((224, 224)), (2,1,0))
        imx_t = np.array(raw_image, dtype=np.float32)/255.0

        return imx_t

    def load_mask(self, path=None):
        raw_image = PIL.Image.open(path)
        raw_image = raw_image.resize((224, 224))
        imx_t = np.array(raw_image)
        # border
        imx_t[imx_t==255] = len(VOC_CLASSES)

        return imx_t

In [10]:
data_root = '/home/marcin/Datasets/VOC2007'
train_txt, val_txt = 'train_mini.txt', 'val_mini.txt'

In [11]:
data_root = '/home/marcin/Datasets/rovco/dataset'
train_txt, val_txt = 'train.txt', 'val.txt'

In [12]:
train_path = os.path.join(data_root, 'ImageSets/Segmentation', train_txt)
val_path = os.path.join(data_root, 'ImageSets/Segmentation/', val_txt)
img_dir = os.path.join(data_root, "JPEGImages")
mask_dir = os.path.join(data_root, "SegmentationClass")

save_dir = './savedir'
checkpoint = None

CUDA = True  # args.gpu is not None
GPU_ID = 0   # args.gpu

BATCH_SIZE = 16

In [13]:
train_dataset = PascalVOCDataset(list_file=train_path,
                                 img_dir=img_dir,
                                 mask_dir=mask_dir)

train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=4)



In [None]:
print(train_dataset.get_class_probability())

sample = train_dataset[11]
image, mask = sample['image'], sample['mask']

image.transpose_(0, 2)

fig = plt.figure()

a = fig.add_subplot(1,2,1)
plt.imshow(image)

a = fig.add_subplot(1,2,2)
plt.imshow(mask)

plt.show()

In [17]:
test_model_on_random_input = False

if test_model_on_random_input:

    # Model
    model = segnet.SegNet(input_channels=3, output_channels=NUM_CLASSES)

    # print(model)

    img = torch.randn([4, 3, 224, 224])
    output, softmaxed_output = model(img)

    print(output.size())
    print(softmaxed_output.size())

    print(output[0,:,0,0])
    print(softmaxed_output[0,:,0,0].sum())

# Train SegNet

In [18]:
# TODO: Change me

# Constants
NUM_INPUT_CHANNELS = 3
NUM_OUTPUT_CHANNELS = NUM_CLASSES

NUM_EPOCHS = 6000

LEARNING_RATE = 1e-3 # 1e-6
MOMENTUM = 0.9

In [19]:
if CUDA:
    model = segnet.SegNet(input_channels=NUM_INPUT_CHANNELS,
                          output_channels=NUM_OUTPUT_CHANNELS).cuda(GPU_ID)

    class_weights = 1.0/train_dataset.get_class_probability().cuda(GPU_ID)
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda(GPU_ID)
else:
    model = segnet.SegNet(input_channels=NUM_INPUT_CHANNELS,
                          output_channels=NUM_OUTPUT_CHANNELS)

    class_weights = 1.0/train_dataset.get_class_probability()
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights)
    
if checkpoint:
    model.load_state_dict(torch.load(args.checkpoint))

In [20]:
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [21]:
def train():
    is_better = True
    prev_loss = float('inf')

    model.train()

    for epoch in range(NUM_EPOCHS):
        loss_f = 0
        t_start = time.time()

        for batch in train_dataloader:
            input_tensor = torch.autograd.Variable(batch['image'])
            target_tensor = torch.autograd.Variable(batch['mask'])

            if CUDA:
                input_tensor = input_tensor.cuda(GPU_ID)
                target_tensor = target_tensor.cuda(GPU_ID)

            predicted_tensor, softmaxed_tensor = model(input_tensor)


            optimizer.zero_grad()
            loss = criterion(softmaxed_tensor, target_tensor)
            loss.backward()
            optimizer.step()


            loss_f += loss.float()
            prediction_f = softmaxed_tensor.float()

        delta = time.time() - t_start
        is_better = loss_f < prev_loss

        if is_better:
            prev_loss = loss_f
            torch.save(model.state_dict(), os.path.join(save_dir, "model_best.pth"))

        print("Epoch #{}\tLoss: {:.8f}\t Time: {:2f}s".format(epoch+1, loss_f, delta))

In [28]:
train()

Epoch #1	Loss: 0.35000780	 Time: 0.525096s
Epoch #2	Loss: 0.34862185	 Time: 0.524583s
Epoch #3	Loss: 0.34821707	 Time: 0.516271s
Epoch #4	Loss: 0.34586528	 Time: 0.516663s
Epoch #5	Loss: 0.34524012	 Time: 0.519219s
Epoch #6	Loss: 0.34394974	 Time: 0.520299s
Epoch #7	Loss: 0.34403336	 Time: 0.524212s
Epoch #8	Loss: 0.34283078	 Time: 0.511041s
Epoch #9	Loss: 0.33954406	 Time: 0.516970s
Epoch #10	Loss: 0.34080076	 Time: 0.515382s
Epoch #11	Loss: 0.33785093	 Time: 0.505640s
Epoch #12	Loss: 0.34325865	 Time: 0.661593s
Epoch #13	Loss: 0.36328009	 Time: 0.507857s
Epoch #14	Loss: 0.34938964	 Time: 0.514261s
Epoch #15	Loss: 0.34107235	 Time: 0.519991s
Epoch #16	Loss: 0.34347156	 Time: 0.519263s
Epoch #17	Loss: 0.34604922	 Time: 0.514745s
Epoch #18	Loss: 0.34335804	 Time: 0.514310s
Epoch #19	Loss: 0.33804953	 Time: 0.526909s
Epoch #20	Loss: 0.33506152	 Time: 0.511319s
Epoch #21	Loss: 0.33755091	 Time: 0.514496s
Epoch #22	Loss: 0.33654803	 Time: 0.520099s
Epoch #23	Loss: 0.33374000	 Time: 0.51313

KeyboardInterrupt: 

In [23]:
def validate():
    model.eval()

    for batch_idx, batch in enumerate(val_dataloader):
        input_tensor = torch.autograd.Variable(batch['image'])
        target_tensor = torch.autograd.Variable(batch['mask'])

        if CUDA:
            input_tensor = input_tensor.cuda(GPU_ID)
            target_tensor = target_tensor.cuda(GPU_ID)

        predicted_tensor, softmaxed_tensor = model(input_tensor)
        loss = criterion(predicted_tensor, target_tensor)

        for idx, predicted_mask in enumerate(softmaxed_tensor):
            target_mask = target_tensor[idx]
            input_image = input_tensor[idx]

            fig = plt.figure()

            a = fig.add_subplot(1,3,1)
            plt.imshow(input_image.transpose(0, 2).cpu().numpy())
            a.set_title('Input Image')

            a = fig.add_subplot(1,3,2)
            predicted_mx = predicted_mask.detach().cpu().numpy()
            predicted_mx = predicted_mx.argmax(axis=0)
            plt.imshow(predicted_mx)
            a.set_title('Predicted Mask')

            a = fig.add_subplot(1,3,3)
            target_mx = target_mask.detach().cpu().numpy()
            plt.imshow(target_mx)
            a.set_title('Ground Truth')

            #fig.savefig(os.path.join(OUTPUT_DIR, "prediction_{}_{}.png".format(batch_idx, idx)))

            #plt.close(fig)

In [30]:


SAVED_MODEL_PATH = './savedir/model_best.pth'

val_dataset = PascalVOCDataset(list_file=val_path,
                               img_dir=img_dir,
                               mask_dir=mask_dir)

val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                            batch_size=BATCH_SIZE,
                                            shuffle=True,
                                            num_workers=4)

In [16]:
# if CUDA:
#     model = segnet.SegNet(input_channels=NUM_INPUT_CHANNELS,
#                           output_channels=NUM_OUTPUT_CHANNELS).cuda(GPU_ID)

#     class_weights = 1.0/val_dataset.get_class_probability().cuda(GPU_ID)
#     criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda(GPU_ID)
# else:
#     model = segnet.SegNet(input_channels=NUM_INPUT_CHANNELS,
#                           output_channels=NUM_OUTPUT_CHANNELS)

#     class_weights = 1.0/val_dataset.get_class_probability()
#     criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

In [None]:
# model.load_state_dict(torch.load(SAVED_MODEL_PATH))

In [None]:
validate()