In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import sys
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import seaborn as sns
sns.set_style("white")

%matplotlib inline

from sklearn.model_selection import train_test_split

from tqdm import tqdm_notebook #, tnrange
#from itertools import chain
from skimage.io import imread, imshow #, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

import torch.nn as nn
import torch.nn.functional as F
from torch.nn import BatchNorm2d
from torchvision.models import ResNet
from torchvision.models.resnet import BasicBlock

import time
from kaggle_util import *
from models import *

t_start = time.time()

Using TensorFlow backend.


In [2]:
BN_EPS = 1e-4

class ConvBn2d(nn.Module):

    def merge_bn(self):
        #raise NotImplementedError
        assert(self.conv.bias==None)
        conv_weight     = self.conv.weight.data
        bn_weight       = self.bn.weight.data
        bn_bias         = self.bn.bias.data
        bn_running_mean = self.bn.running_mean
        bn_running_var  = self.bn.running_var
        bn_eps          = self.bn.eps

        #https://github.com/sanghoon/pva-faster-rcnn/issues/5
        #https://github.com/sanghoon/pva-faster-rcnn/commit/39570aab8c6513f0e76e5ab5dba8dfbf63e9c68c

        N,C,KH,KW = conv_weight.size()
        std = 1/(torch.sqrt(bn_running_var+bn_eps))
        std_bn_weight =(std*bn_weight).repeat(C*KH*KW,1).t().contiguous().view(N,C,KH,KW )
        conv_weight_hat = std_bn_weight*conv_weight
        conv_bias_hat   = (bn_bias - bn_weight*std*bn_running_mean)

        self.bn   = None
        self.conv = nn.Conv2d(in_channels=self.conv.in_channels, out_channels=self.conv.out_channels, kernel_size=self.conv.kernel_size,
                              padding=self.conv.padding, stride=self.conv.stride, dilation=self.conv.dilation, groups=self.conv.groups,
                              bias=True)
        self.conv.weight.data = conv_weight_hat #fill in
        self.conv.bias.data   = conv_bias_hat


    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, dilation=1, stride=1, groups=1, is_bn=True):
        super(ConvBn2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation, groups=groups, bias=False)
        self.bn   = nn.BatchNorm2d(out_channels, eps=BN_EPS)

        if is_bn is False:
            self.bn =None

    def forward(self,x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        return x

class Net(nn.Module):
    def __init__(self ):
        super(Net,self).__init__()
        self.resnet = ResNet(BasicBlock, [3, 4, 6, 3], num_classes=1 )

        self.encoder1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False),
            BatchNorm2d(64),
            nn.ReLU(inplace=True),
        )
        self.encoder2 = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            self.resnet.layer1,
        )
        self.encoder3 = self.resnet.layer2
        self.encoder4 = self.resnet.layer3
        self.encoder5 = self.resnet.layer4

        self.center = nn.Sequential(
            ConvBn2d( 512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            ConvBn2d( 512, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )

        self.decoder5 = Decoder(256, 512, 512, 64)
        self.decoder4 = Decoder( 64, 256, 256, 64)
        self.decoder3 = Decoder( 64, 128, 128, 64)
        self.decoder2 = Decoder( 64,  64,  64, 64)
        self.decoder1 = Decoder( 64,  64,  32, 64)

        self.logit_pixel  = nn.Sequential(
            nn.Conv2d(320, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d( 64,  1, kernel_size=1, padding=0),
        )

        self.logit_image = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 1),
        )



    def forward(self, x):
        batch_size,C,H,W = x.shape

        mean=[0.485, 0.456, 0.406]
        std =[0.229, 0.224, 0.225]
        x = torch.cat([
            (x-mean[2])/std[2],
            (x-mean[1])/std[1],
            (x-mean[0])/std[0],
        ],1)


        e1 = self.encoder1(x )  #; print('e1',e1.size())
        e2 = self.encoder2(e1)  #; print('e2',e2.size())
        e3 = self.encoder3(e2)  #; print('e3',e3.size())
        e4 = self.encoder4(e3)  #; print('e4',e4.size())
        e5 = self.encoder5(e4)  #; print('e5',e5.size())

        f = self.center(e5)                #; print('f',f.size())

        d5 = self.decoder5( f,e5)          #; print('d5',f.size())
        d4 = self.decoder4(d5,e4)          #; print('d4',f.size())
        d3 = self.decoder3(d4,e3)          #; print('d3',f.size())
        d2 = self.decoder2(d3,e2)          #; print('d2',f.size())
        d1 = self.decoder1(d2,e1)          #; print('d1',f.size())

        f = torch.cat((
            d1,
            F.upsample(d2,scale_factor= 2, mode='bilinear',align_corners=False),
            F.upsample(d3,scale_factor= 4, mode='bilinear',align_corners=False),
            F.upsample(d4,scale_factor= 8, mode='bilinear',align_corners=False),
            F.upsample(d5,scale_factor=16, mode='bilinear',align_corners=False),
        ),1)
        f = F.dropout(f, p=0.50, training=self.training)
        logit_pixel = self.logit_pixel(f)


        f = F.adaptive_avg_pool2d(e5, output_size=1).view(batch_size,-1)
        f = F.dropout(f, p=0.50, training=self.training)
        logit_image = self.logit_image(f).view(-1)

        return logit_pixel, logit_image


    ##-----------------------------------------------------------------


    def criterion(self, logit_pixel, logit_image, truth_pixel, truth_image, is_average=True):
        weight_image, weight_pixel = 1, 1

        loss_image = F.binary_cross_entropy_with_logits(logit_image, truth_image, reduce=is_average)

        #--
        loss_pixel = lovasz_loss(logit_pixel, truth_pixel, mode='logistic', is_average=False)
        #loss_pixel = lovasz_loss(logit_pixel, truth_pixel, mode='hinge', is_average=False)
        #loss_pixel = PseudoBCELoss2d()(logit_pixel, truth_pixel, is_average=False)
        #loss_pixel = FocalLoss2d()(logit_pixel, truth_pixel, type='sigmoid', is_average=False)

        #--
        loss_pixel = loss_pixel*truth_image #loss for empty image is weighted 0
        if is_average:
            loss_pixel = loss_pixel.sum()/truth_image.sum()


        #weight_image, weight_pixel = 0.1, 10  #focal
        weight_image, weight_pixel = 0.1, 2   #lovasz?
        #weight_image, weight_pixel = 0.1, 2 #bce

        return weight_pixel*loss_pixel, weight_image*loss_image

In [3]:
img_size_ori = 101
img_size_target = 101

def upsample(img):
    if img_size_ori == img_size_target:
        return img
    return resize(img, (img_size_target, img_size_target), mode='constant', preserve_range=True)
    
def downsample(img):
    if img_size_ori == img_size_target:
        return img
    return resize(img, (img_size_ori, img_size_ori), mode='constant', preserve_range=True)


In [4]:
net = Net()

NameError: name 'Decoder' is not defined

In [None]:
# Loading of training/testing ids and depths
train_df = pd.read_csv("../input/train.csv", index_col="id", usecols=[0])
depths_df = pd.read_csv("../input/depths.csv", index_col="id")
train_df = train_df.join(depths_df)
test_df = depths_df[~depths_df.index.isin(train_df.index)]

len(train_df)

train_df["images"] = [np.array(load_img("../input/train/images/{}.png".format(idx), grayscale=True)) / 255 for idx in tqdm_notebook(train_df.index)]
train_df["masks"] = [np.array(load_img("../input/train/masks/{}.png".format(idx), grayscale=True)) / 255 for idx in tqdm_notebook(train_df.index)]
train_df["coverage"] = train_df.masks.map(np.sum) / pow(img_size_ori, 2)
train_df["coverage_class"] = train_df.coverage.map(cov_to_class)
train_df['empty'] = train_df['masks'].apply(lambda x: (x.max()!=0) * 1)

In [None]:
train_df['empty'].value_counts()

In [None]:
SUBSET = len(train_df)
train_df = train_df.head(SUBSET)
len(train_df)

In [None]:
ids_train, ids_valid, x_train, x_valid, y_train, y_valid, cov_train, cov_test, depth_train, depth_test, empty_train, empty_test = train_test_split(
train_df.index.values,
np.array(train_df.images.map(upsample).tolist()).reshape(-1, img_size_target, img_size_target, 1), 
np.array(train_df.masks.map(upsample).tolist()).reshape(-1, img_size_target, img_size_target, 1), 
train_df.coverage.values,
train_df.z.values,
train_df['empty'].values,
test_size=0.2, stratify=train_df.coverage_class, random_state= 1234)

In [None]:
#Data augmentation
x_train = np.append(x_train, [np.fliplr(x) for x in x_train], axis=0)
y_train = np.append(y_train, [np.fliplr(x) for x in y_train], axis=0)
empty_train = np.append(empty_train, empty_train, axis=0)
print(x_train.shape)
print(y_valid.shape)
print(empty_train.shape)

In [None]:
start_feature = 32
batch_size = 32
dropout = 0.5
base_name = 'Unet_resnet_3loss_{}_{}_{}'.format(start_feature, batch_size, dropout)
basic_name = '../model/{}'.format(base_name)
save_model_name = basic_name + '.model'
submission_file = basic_name + '.csv'

print(save_model_name)
print(submission_file)

# model
input_layer = Input((img_size_target, img_size_target, 1))
output_layer, out_empty, out_final = build_model_deeper(input_layer, start_feature,dropout)

model1 = Model(input_layer, [output_layer, out_empty, out_final])

losses = {
    'empty_out' : 'binary_crossentropy',
    'segment_out':lovasz_loss,
    #'final_out' : lovasz_loss,
}
lossWeights = {
    'empty_out' : 0.2,
    'segment_out':2,
    #'final_out' : 3,
}
c = optimizers.adam(lr = 0.01)
model1.compile(loss=losses, loss_weights=lossWeights, optimizer=c, metrics=[my_iou_metric_2])

In [None]:
y_combine_rain = {
    'empty_out' : empty_train,
    'segment_out':y_train,
    #'final_out' : y_train,
}

y_combine_test = {
    'empty_out' : empty_test,
    'segment_out':y_valid,
    #'final_out' : y_valid,
}

epochs = 200

board = keras.callbacks.TensorBoard(log_dir='log/{}'.format(base_name),
                       histogram_freq=0, write_graph=True, write_images=False)
early_stopping = EarlyStopping(monitor='val_final_out_my_iou_metric_2', mode = 'max',patience=20, verbose=1)
model_checkpoint = ModelCheckpoint(save_model_name,monitor='val_segment_out_my_iou_metric_2', 
                                   mode = 'max', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_final_out_my_iou_metric_2', mode = 'max',factor=0.5, patience=3, min_lr=0.00001, verbose=1)


history = model1.fit(x_train, y_combine_rain,
                    validation_data=[x_valid, y_combine_test], 
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=[board, early_stopping, model_checkpoint,reduce_lr], 
                    verbose=1)

In [None]:
model = load_model(save_model_name,custom_objects={'my_iou_metric_2': my_iou_metric_2,
                                                   'lovasz_loss': lovasz_loss})



In [None]:
preds_valid = predict_result(model,x_valid,img_size_target)
## Scoring for last model, choose threshold by validation data 
thresholds_ori = np.linspace(0.3, 0.7, 31)
# Reverse sigmoid function: Use code below because the  sigmoid activation was removed
thresholds = np.log(thresholds_ori/(1-thresholds_ori)) 

# ious = np.array([get_iou_vector(y_valid, preds_valid > threshold) for threshold in tqdm_notebook(thresholds)])
# print(ious)
ious = np.array([iou_metric_batch(y_valid, preds_valid > threshold) for threshold in tqdm_notebook(thresholds)])
print(ious)

# instead of using default 0 as threshold, use validation data to find the best threshold.
threshold_best_index = np.argmax(ious) 
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]

plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, "xr", label="Best threshold")
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))
plt.legend()

In [None]:
x_test = np.array([(np.array(load_img("../input/test/images/{}.png".format(idx), grayscale = True))) / 255 for idx in tqdm_notebook(test_df.index)]).reshape(-1, img_size_target, img_size_target, 1)

preds_test = predict_result(model,x_test,img_size_target)

In [None]:
t1 = time.time()
pred_dict = {idx: rle_encode(np.round(downsample(preds_test[i]) > threshold_best)) for i, idx in enumerate(tqdm_notebook(test_df.index.values))}
t2 = time.time()

print(f"Usedtime = {t2-t1} s")

sub = pd.DataFrame.from_dict(pred_dict,orient='index')
sub.index.names = ['id']
sub.columns = ['rle_mask']
sub = sub.reset_index()
save_result(sub, '../result/{}.csv'.format(base_name), 
                        competition = 'tgs-salt-identification-challenge', 
                        send = True, index = False)

t_finish = time.time()
print(f"Kernel run time = {(t_finish-t_start)/3600} hours")