In [1]:
import torch
import torchvision.models
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F

import numpy as np
import scipy.misc
import matplotlib.pyplot as plt
import skimage.transform
import skimage.io
from skimage.transform import resize

import warnings; warnings.simplefilter('ignore')
import os
%matplotlib inline
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [2]:
# load data
train_X = np.load("train_X.npy")
train_y = np.load("train_y.npy")
valid_X = np.load("valid_X.npy")
valid_y = np.load("valid_y.npy")

# transform for torch tensor
train_X = torch.from_numpy(train_X).type(torch.FloatTensor)
train_y = torch.from_numpy(train_y).type(torch.LongTensor)
valid_X = torch.from_numpy(valid_X).type(torch.FloatTensor)

valid_dataset_path = "./hw3-train-validation/validation/"

# construct id list
valid_image_id_list = sorted(list(set([item.split("_")[0] for item in os.listdir(valid_dataset_path)])))


In [3]:
def mean_iou_score(pred, labels):
    '''
    Compute mean IoU score over 6 classes
    '''
    mean_iou = 0
    for i in range(6):
        tp_fp = np.sum(pred == i)
        tp_fn = np.sum(labels == i)
        tp = np.sum((pred == i) * (labels == i))
        iou = tp / (tp_fp + tp_fn - tp)
        mean_iou += iou / 6
#         print('class #%d : %1.5f'%(i, iou))
    return mean_iou

In [4]:
# model collections
class fcn32s(nn.Module):
    def __init__(self, num_classes, pretrained = True):
        super(fcn32s, self).__init__()
        self.vgg = torchvision.models.vgg16(pretrained=True)
        # nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        # output_padding=0, groups=1, bias=True, dilation=1)
        self.vgg.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=(2, 2), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, num_classes, kernel_size=(1, 1), stride=(1, 1)),
            nn.ConvTranspose2d(num_classes, num_classes, 64 , 32 , 0, bias=False),
        )
    def  forward (self, x) :        
        x = self.vgg.features(x)
#         print(x.size())
        x = self.vgg.classifier(x)
        return x

class fcn32s_prune(nn.Module):
    # cut off the conv6 and conv7, then directly upsample
    def __init__(self, num_classes, pretrained = True):
        super(fcn32s_prune, self).__init__()
        self.vgg = torchvision.models.vgg16(pretrained=True)
        self.vgg.classifier = nn.Sequential(
            nn.ConvTranspose2d(512, num_classes, 32 , 32 , 0, bias=False)
        )
    def  forward (self, x) :        
        x = self.vgg.features(x)
        x = self.vgg.classifier(x)
        return x

class fcn16s(nn.Module):
    def __init__(self, num_classes, pretrained = True):
        super(fcn16s, self).__init__()
        self.vgg = torchvision.models.vgg16(pretrained=True)
        self.to_pool4 = nn.Sequential(*list(self.vgg.features.children())[:24])
        self.to_pool5 = nn.Sequential(*list(self.vgg.features.children())[24:])
        self.vgg.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=(2, 2), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, num_classes, kernel_size=(1, 1), stride=(1, 1)),
            nn.ConvTranspose2d(num_classes, 512, 4 , 2 , 0, bias=False)
            )
        self.upsample16 = nn.ConvTranspose2d(512, num_classes, 16 , 16 , 0, bias=False)
    def  forward (self, x) :        
        pool4_output = self.to_pool4(x) #pool4 output size torch.Size([64, 512, 16, 16])
        x = self.to_pool5(pool4_output)
        x = self.vgg.classifier(x)    # 2xconv7 output size torch.Size([64, 512, 16, 16])
        x = self.upsample16(x+pool4_output)
        return x

class fcn8s(nn.Module):
    def __init__(self, num_classes, pretrained = True):
        super(fcn8s, self).__init__()
        self.vgg = torchvision.models.vgg16(pretrained=True)
        self.to_pool3 = nn.Sequential(*list(self.vgg.features.children())[:17])
        self.to_pool4 = nn.Sequential(*list(self.vgg.features.children())[17:24])
        self.to_pool5 = nn.Sequential(*list(self.vgg.features.children())[24:])
        self.vgg.classifier = nn.Sequential(
            nn.Conv2d(512, 4096, kernel_size=(2, 2), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
            
            nn.Conv2d(4096, num_classes, kernel_size=(1, 1), stride=(1, 1)),
            nn.ConvTranspose2d(num_classes, 256, 8 , 4 , 0, bias=False) # 4x conv7
            )
        self.pool4_upsample = nn.ConvTranspose2d(512, 256, 2 , 2 , 0, bias=False)
        self.upsample8 = nn.ConvTranspose2d(256, num_classes, 8 , 8 , 0, bias=False)
    def  forward (self, x) :
        pool3_output = self.to_pool3(x) # [64, 256, 32, 32]
        pool4_output = self.to_pool4(pool3_output) #pool4 output size torch.Size([64, 512, 16, 16])
        pool4_2x = self.pool4_upsample(pool4_output) # 2x pool4 torch.Size([64, 512, 32, 32])
        x = self.to_pool5(pool4_output)
        x = self.vgg.classifier(x)  # 4x conv7 torch.Size([64, 256, 32, 32])
        x = self.upsample8(x+pool3_output+pool4_2x)
        return x

In [14]:
# select model
model_name = "fcn32s"
model = fcn32s(7).cuda()
# or
# import model
# model = model.fcn8s(7).cuda()

model = torch.nn.DataParallel(model).cuda()

optimizer = optim.Adam(model.parameters(),lr=0.0002, betas=(0.9, 0.999))
criterion = nn.NLLLoss2d()

In [15]:
BATCH_SIZE = 64

# training 
for epoch in range(25):
    print("Epoch:", epoch+1)
    running_loss = 0.0
    total_length = len(train_X)
    # shuffle
    perm_index = torch.randperm(total_length)
    train_X_sfl = train_X[perm_index]
    train_y_sfl = train_y[perm_index]
    
    # construct training batch
    for index in range(0,total_length ,BATCH_SIZE):
        if index+BATCH_SIZE > total_length:
            break
        # zero the parameter gradients
        optimizer.zero_grad()
        input_X = train_X_sfl[index:index+BATCH_SIZE]
        input_y = train_y_sfl[index:index+BATCH_SIZE]

        # use GPU
        input_X = Variable(input_X.cuda())
        input_y = Variable(input_y.cuda())

        # forward + backward + optimize
        outputs = model(input_X)
        outputs = F.log_softmax(outputs, dim= 1)
        loss = criterion(outputs, input_y)
        loss.backward()
        optimizer.step()
        running_loss += loss.data[0]
        
    print("Loss:",running_loss/(total_length/BATCH_SIZE))
    
    # validation stage
    model.eval()
    pred = torch.FloatTensor()
    pred = pred.cuda()
    for i in range(len(valid_X)):
        input_X_valid = Variable(valid_X[i].view(1,3,256,256).cuda())
        output = model(input_X_valid)
        pred = torch.cat((pred,output.data),0)
    pred = pred.cpu().numpy()
#     print("resize...")
    
    pred = np.argmax(pred,1)
    
    pred_512 = np.array([resize(p,output_shape=(512,512), order=0,preserve_range=True,clip=True) for p in pred])
    mean_iou = mean_iou_score(pred_512, valid_y)
    print("mean iou score",mean_iou)
    if epoch+1 in [1,10,20]: # save pred map
        # decoding stage
        n_masks = len(valid_X)
        masks_RGB = np.empty((n_masks, 512, 512, 3))
        for i, p in enumerate(pred_512):
            masks_RGB[i, p == 0] = [0,255,255]
            masks_RGB[i, p == 1] = [255,255,0]
            masks_RGB[i, p == 2] = [255,0,255]
            masks_RGB[i, p == 3] = [0,255,0]
            masks_RGB[i, p == 4] = [0,0,255]
            masks_RGB[i, p == 5] = [255,255,255]
            masks_RGB[i, p == 6] = [0,0,0]
        masks_RGB = masks_RGB.astype(np.uint8)
        # save them
        print("save image")
        output_dir = "./output_folder_"+str(epoch+1)
        for i, mask_RGB in enumerate(masks_RGB):
            skimage.io.imsave(os.path.join(output_dir,valid_image_id_list[i]+"_mask.png"), mask_RGB)
#         torch.save(model.state_dict(), "./models/"+model_name+"_"+ str(mean_iou)[:4]+".pkl")
    print("\n")
    model.train()

Epoch: 1
Loss: 1.3540741342824103
mean iou score 0.10268720051688249
save image


Epoch: 2
Loss: 1.0447875854518576
mean iou score 0.29095089698972226


Epoch: 3
Loss: 0.8021965389688758
mean iou score 0.4395690154548463


Epoch: 4
Loss: 0.6894721077780057
mean iou score 0.4313366712356995


Epoch: 5
Loss: 0.6336214071324106
mean iou score 0.5651411732623215


Epoch: 6
Loss: 0.5715759193232397
mean iou score 0.5881107697355468


Epoch: 7
Loss: 0.5197833317263117
mean iou score 0.5976338996202581


Epoch: 8
Loss: 0.49388975134361784
mean iou score 0.6044106072302049


Epoch: 9
Loss: 0.43418928147186925
mean iou score 0.6334926487187762


Epoch: 10
Loss: 0.41060906017285326
mean iou score 0.6427519652676011
save image


Epoch: 11
Loss: 0.3784385801547026
mean iou score 0.6296870999125539


Epoch: 12
Loss: 0.3925565702287449
mean iou score 0.6291898875045074


Epoch: 13
Loss: 0.37028098446540486
mean iou score 0.6180790976200747


Epoch: 14
Loss: 0.30681062517772234
mean iou score 0.63491