In [1]:
import cv2
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd

import torch
import torch.nn as nn
import torch.utils as utils
import torch.nn.init as init
import torch.utils.data as data
import torchvision.utils as v_utils
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.utils.data import Dataset

In [2]:
%matplotlib inline

# Preprocessing

In [3]:
masks  =  pd.read_csv ('./train_ship_segmentations.csv')
masks.head()

Unnamed: 0,ImageId,EncodedPixels
0,00003e153.jpg,
1,000155de5.jpg,264661 17 265429 33 266197 33 266965 33 267733...
2,00021ddc3.jpg,101361 1 102128 3 102896 4 103663 6 104430 9 1...
3,00021ddc3.jpg,95225 2 95992 5 96760 7 97527 9 98294 9 99062 ...
4,00021ddc3.jpg,74444 4 75212 4 75980 4 76748 4 77517 3 78285 ...


In [4]:
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    #nan check
    if mask_rle != mask_rle:
        img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
        return img.reshape(shape).T
    
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

In [5]:
def create_mask(image_path):
    img_masks = masks.loc[masks['ImageId'] == image_path.split('/')[-1], 'EncodedPixels'].tolist()

    # Take the individual ship masks and create a single mask array for all ships
    masks_all = np.zeros((768, 768))
    for mask in img_masks:
        masks_all += rle_decode(mask)
    masks_all = np.minimum(masks_all, 1)*255
    tmp = np.array((masks_all, masks_all, masks_all), dtype=np.uint8)
    tmp = tmp.transpose(1,2,0)
    
    return tmp

In [6]:
def convert_runlength(convert_image):
    convert_image = cv2.resize(convert_image, (768, 768))
    flatten = np.where((convert_image.T).flatten() == 1)[0]
    runlength = ""
    count = 0
    if len(flatten) == 1:
        runlength = str(flatten[0]) + " 1"
        return runlength
    for i in range(len(flatten)):
        if i == 0:
            runlength = runlength +  str(flatten[i]) + " "
            count = 1
        elif i == len(flatten)-1:
            if flatten[i] == flatten[i-1]+1:
                count += 1
                runlength = runlength + str(count)
            else:
                runlength = runlength + str(count) + " " + str(flatten[i]) + " 1"
        else:
            if flatten[i] == flatten[i-1]+1:
                count += 1
            else:
                runlength = runlength + str(count) + " " + str(flatten[i]) + " "
                count = 1
    return runlength

# Make Network

In [7]:
class AirbusDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.image_paths = os.listdir(root_dir)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = os.path.join(self.root_dir, self.image_paths[idx])
        image = cv2.imread(image_path)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        masked_img = create_mask(image_path)
        if self.transform:
            image = self.transform(image)
            masked_img = self.transform(masked_img)
        return image, masked_img

In [8]:
class AirbusTestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.image_paths = os.listdir(root_dir)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = os.path.join(self.root_dir, self.image_paths[idx])
        image = cv2.imread(image_path)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image)
        return image, self.image_paths[idx]

In [9]:
batch_size = 4
img_size = 256

In [10]:
train_dataset = AirbusDataset(root_dir='./train/', transform=transforms.Compose([
                                            transforms.ToPILImage(),
                                            transforms.Resize(size=img_size),
                                            transforms.ToTensor(),
                                            ]))
train_batch = data.DataLoader(train_dataset, batch_size=batch_size,
                            shuffle=True, num_workers=2)

# Make Network

In [11]:
def conv_block(in_dim,out_dim,act_fn):
    model = nn.Sequential(
        nn.Conv2d(in_dim,out_dim, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_dim),
        act_fn,
    )
    return model


def conv_trans_block(in_dim,out_dim,act_fn):
    model = nn.Sequential(
        nn.ConvTranspose2d(in_dim,out_dim, kernel_size=3, stride=2, padding=1,output_padding=1),
        nn.BatchNorm2d(out_dim),
        act_fn,
    )
    return model


def maxpool():
    pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
    return pool


def conv_block_2(in_dim,out_dim,act_fn):
    model = nn.Sequential(
        conv_block(in_dim,out_dim,act_fn),
        nn.Conv2d(out_dim,out_dim, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_dim),
    )
    return model    


def conv_block_3(in_dim,out_dim,act_fn):
    model = nn.Sequential(
        conv_block(in_dim,out_dim,act_fn),
        conv_block(out_dim,out_dim,act_fn),
        nn.Conv2d(out_dim,out_dim, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_dim),
    )
    return model

In [12]:
class Conv_residual_conv(nn.Module):

    def __init__(self,in_dim,out_dim,act_fn):
        super(Conv_residual_conv,self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        act_fn = act_fn

        self.conv_1 = conv_block(self.in_dim,self.out_dim,act_fn)
        self.conv_2 = conv_block_3(self.out_dim,self.out_dim,act_fn)
        self.conv_3 = conv_block(self.out_dim,self.out_dim,act_fn)

    def forward(self,input):
        conv_1 = self.conv_1(input)
        conv_2 = self.conv_2(conv_1)
        res = conv_1 + conv_2
        conv_3 = self.conv_3(res)
        return conv_3


class FusionGenerator(nn.Module):

    def __init__(self,input_nc, output_nc, ngf):
        super(FusionGenerator,self).__init__()
        self.in_dim = input_nc
        self.out_dim = ngf
        self.final_out_dim = output_nc
        act_fn = nn.LeakyReLU(0.2, inplace=True)
        act_fn_2 = nn.ReLU()

        print("\n------Initiating FusionNet------\n")

        # encoder

        self.down_1 = Conv_residual_conv(self.in_dim, self.out_dim, act_fn)
        self.pool_1 = maxpool()
        self.down_2 = Conv_residual_conv(self.out_dim, self.out_dim * 2, act_fn)
        self.pool_2 = maxpool()
        self.down_3 = Conv_residual_conv(self.out_dim * 2, self.out_dim * 4, act_fn)
        self.pool_3 = maxpool()
        self.down_4 = Conv_residual_conv(self.out_dim * 4, self.out_dim * 8, act_fn)
        self.pool_4 = maxpool()

        # bridge

        self.bridge = Conv_residual_conv(self.out_dim * 8, self.out_dim * 16, act_fn)

        # decoder

        self.deconv_1 = conv_trans_block(self.out_dim * 16, self.out_dim * 8, act_fn_2)
        self.up_1 = Conv_residual_conv(self.out_dim * 8, self.out_dim * 8, act_fn_2)
        self.deconv_2 = conv_trans_block(self.out_dim * 8, self.out_dim * 4, act_fn_2)
        self.up_2 = Conv_residual_conv(self.out_dim * 4, self.out_dim * 4, act_fn_2)
        self.deconv_3 = conv_trans_block(self.out_dim * 4, self.out_dim * 2, act_fn_2)
        self.up_3 = Conv_residual_conv(self.out_dim * 2, self.out_dim * 2, act_fn_2)
        self.deconv_4 = conv_trans_block(self.out_dim * 2, self.out_dim, act_fn_2)
        self.up_4 = Conv_residual_conv(self.out_dim, self.out_dim, act_fn_2)

        # output

        self.out = nn.Conv2d(self.out_dim,self.final_out_dim, kernel_size=3, stride=1, padding=1)
        self.out_2 = nn.Tanh()
        '''
        self.out = nn.Sequential(
            nn.Conv2d(self.out_dim,self.final_out_dim, kernel_size=3, stride=1, padding=1),
            #nn.BatchNorm2d(self.final_out_dim),
            nn.Tanh(),
        )
        '''

        # initialization

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0.0, 0.02)
                m.bias.data.fill_(0)
            
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.normal_(1.0, 0.02)
                m.bias.data.fill_(0)


    def forward(self,input):

        down_1 = self.down_1(input)
        pool_1 = self.pool_1(down_1)
        down_2 = self.down_2(pool_1)
        pool_2 = self.pool_2(down_2)
        down_3 = self.down_3(pool_2)
        pool_3 = self.pool_3(down_3)
        down_4 = self.down_4(pool_3)
        pool_4 = self.pool_4(down_4)

        bridge = self.bridge(pool_4)

        deconv_1 = self.deconv_1(bridge)
        skip_1 = (deconv_1 + down_4)/2
        up_1 = self.up_1(skip_1)
        deconv_2 = self.deconv_2(up_1)
        skip_2 = (deconv_2 + down_3)/2
        up_2 = self.up_2(skip_2)
        deconv_3 = self.deconv_3(up_2)
        skip_3 = (deconv_3 + down_2)/2
        up_3 = self.up_3(skip_3)
        deconv_4 = self.deconv_4(up_3)
        skip_4 = (deconv_4 + down_1)/2
        up_4 = self.up_4(skip_4)

        out = self.out(up_4)
        out = self.out_2(out)
        #out = torch.clamp(out, min=-1, max=1)

        return out

In [13]:
generator = nn.DataParallel(FusionGenerator(3,3,64),device_ids=[0]).cuda()
recon_loss_func = nn.MSELoss()
lr = 0.002
gen_optimizer = torch.optim.Adam(generator.parameters(),lr=lr)


------Initiating FusionNet------



# Training

In [14]:
import slack_notification as sn

In [None]:
epoch = 10
file = open('./fusionnet_mse_loss', 'w')
for i in range(epoch):
    for _, (images,img_masks) in enumerate(train_batch):
        gen_optimizer.zero_grad()

        x = Variable(images).cuda(0)
        y_ = Variable(img_masks).cuda(0).float()
        y = generator.forward(x)
        
        loss = recon_loss_func(y,y_)
        file.write(str(loss)+"\n")
        loss.backward()
        gen_optimizer.step()

        if _ % 400 ==0:
            print(i)
            print(loss)
            v_utils.save_image(x.cpu().data,"./result_fusion/original_image_{}_{}.png".format(i,_))
            v_utils.save_image(y_.cpu().data,"./result_fusion/label_image_{}_{}.png".format(i,_))
            v_utils.save_image(y.cpu().data,"./result_fusion/gen_image_{}_{}.png".format(i,_))
            torch.save(generator,'./model_fusion/fusion_{}_{}.pkl'.format(i,_))    
            torch.save(generator.state_dict(), './model_fusion_state/fusion_{}_{}.pkl'.format(i,_))
            
            sn.send_notification(text = 'finish: epoch {}, batch_idx {}'.format(i,_))


0
Variable containing:
1.00000e-02 *
  7.5492
[torch.cuda.FloatTensor of size 1 (GPU 0)]



  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


0
Variable containing:
1.00000e-05 *
  3.0392
[torch.cuda.FloatTensor of size 1 (GPU 0)]



# evaluation

In [None]:
test_dataset = AirbusTestDataset(root_dir='./test/', transform=transforms.Compose([
                                            transforms.ToPILImage(),
                                            transforms.Resize(size=img_size),
                                            transforms.ToTensor(),
                                            ]))
test_batch = data.DataLoader(test_dataset, batch_size=batch_size,
                            shuffle=True, num_workers=2)

In [None]:
model = torch.load("./model_fusion/fusion_4_800.pkl")

In [None]:
ignore_images = ['13703f040.jpg',
                 '14715c06d.jpg',
                 '33e0ff2d5.jpg',
                 '4d4e09f2a.jpg',
                 '877691df8.jpg',
                 '8b909bb20.jpg',
                 'a8d99130e.jpg',
                 'ad55c3143.jpg',
                 'c8260c541.jpg',
                 'd6c7f17c7.jpg',
                 'dc3e7c901.jpg',
                 'e44dffe88.jpg',
                 'ef87bad36.jpg',
                 'f083256d8.jpg']

In [None]:
def test():
    index = 0
    df = pd.DataFrame({'ImageId': [], 'EncodedPixels': []})
    df = df[['ImageId', 'EncodedPixels']]
    
    for _, (image, image_paths) in enumerate(test_batch):
        image = Variable(image)
        outputs = (model(image).data).cpu().numpy()
        for img_name, out_img in zip(image_paths, outputs):
            if img_name in ignore_images:
                continue
            out_img = out_img.transpose(1,2,0)
            out_img = np.maximum(out_img, 0)
            out_img = np.minimum(out_img, 1)
            out_gray = np.mean(out_img, axis=2)
        
            thresh = 0.2
            max_pixel = 1
            _, out_thresh = cv2.threshold(out_gray, thresh, max_pixel, cv2.THRESH_BINARY)
        
            out_thresh = cv2.resize(out_thresh, (768, 768))
            rl = convert_runlength(out_thresh)
            
            df.loc[index] = [img_name, rl]
            if index % 5000 == 0:
                df.to_csv('submission_fusion.csv', header=True, index=False)
                sn.send_notification(text='{} tests done'.format(index))
            index += 1
            
    df.to_csv('submission_fusion.csv', header=True, index=False)
    sn.send_notification(text='all done'.format(index))