[View in Colaboratory](https://colab.research.google.com/github/slt666666/kaggle_salt/blob/master/depth_center_false_edge_binary1.ipynb)

In [0]:
!cat /proc/uptime | awk '{print $1 /60 /60 /24 "days (" $1 "sec)"}'

In [0]:
!pip install -q http://download.pytorch.org/whl/cu80/torch-0.4.1-cp36-cp36m-linux_x86_64.whl 
!pip install -q torchvision

In [0]:
import torch
torch.cuda.is_available()

In [0]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!wget https://launchpad.net/~alessandro-strada/+archive/ubuntu/google-drive-ocamlfuse-beta/+build/15331130/+files/google-drive-ocamlfuse_0.7.0-0ubuntu1_amd64.deb
!dpkg -i google-drive-ocamlfuse_0.7.0-0ubuntu1_amd64.deb
!apt-get install -f
!apt-get -y install -qq fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
!cp drive/kaggle/salt/input.zip .

In [0]:
!unzip -q input.zip
!mkdir input/test
!mkdir input/train
!unzip -q input/test.zip
!mv images input/test/
!unzip -q input/train.zip
!mv images input/train/
!mv masks input/train/
!rm input.zip
!rm input/test.zip
!rm input/train.zip

In [0]:
directory = 'input'
seed = 78
fold = 1

In [0]:
# ResNet
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import torch

__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']

model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes,
                     kernel_size=3, stride=stride,
                     padding=1, bias=False)


class ChannelGate2d(nn.Module):

    def __init__(self, channels, reduction=2):
        super(ChannelGate2d, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
                             padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
                             padding=0)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)

        return module_input * x


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, activation=None, SE=False):
        super(BasicBlock, self).__init__()
        self.SE = SE
        if activation is None:
            self.activation = nn.ReLU(inplace=True)
        else:
            self.activation = activation

        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride
        if SE:
            self.cSE = ChannelGate2d(planes, reduction=16)

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.activation(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        if self.SE:
            out = self.cSE(out)

        out += residual
        out = self.activation(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, activation=None, SE=False):
        super(Bottleneck, self).__init__()
        self.SE = SE
        if activation is None:
            self.activation = nn.ReLU(inplace=True)
        else:
            self.activation = activation

        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.downsample = downsample
        self.stride = stride
        if SE:
            self.cSE = ChannelGate2d(planes, reduction=16)

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.activation(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.activation(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        if self.SE:
            out = self.cSE(out)

        out += residual
        out = self.activation(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, activation=None, num_classes=1000, SE=False):
        super(ResNet, self).__init__()

        self.SE = SE
        self.inplanes = 64
        if activation is None:
            self.activation = nn.ReLU(inplace=True)
        else:
            self.activation = activation

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7,
                               stride=1, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, activation=self.activation, SE=self.SE))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, activation=self.activation, SE=self.SE))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.activation(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


def resnet34(pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']), strict=False)
    return model


def load_pretrain_file(net, pretrain_file, skip=['cSE']):
    pretrain_state_dict = torch.load(pretrain_file)
    state_dict = net.state_dict()
    keys = list(state_dict.keys())
    for key in keys:
        if any(s in key for s in skip):
            continue
        else:
            state_dict[key] = pretrain_state_dict[key]

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
from torch.autograd import Variable
from torch.optim import lr_scheduler
from pathlib import Path
from tqdm import tqdm, tqdm_notebook
import random
import time

class ELU_1(nn.ELU):
    def __init__(self, *args, **kwargs):
        super(ELU_1, self).__init__(*args, **kwargs)

    def forward(self, input):
        return F.elu(input, self.alpha, self.inplace)

class ConvBn2d(nn.Module):
    def __init__(self, in_channels, out_channels,
                 kernel_size=(3, 3), stride=(1, 1),
                 padding=(1, 1), groups=1, dilation=1):
        super(ConvBn2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size=kernel_size,
                              stride=stride,
                              padding=padding,
                              bias=False,
                              groups=groups,
                              dilation=dilation)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x

class CenterBlock(nn.Module):
    def __init__(self, in_channels, out_channels, pool=True, SE=False):
        super(CenterBlock, self).__init__()
        self.SE = SE
        self.pool = pool
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = ConvBn2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.conv2 = ConvBn2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.conv_res = nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0)
        if SE:
            self.se = scSqueezeExcitationGate(out_channels)

    def forward(self, x):
        if self.pool:
            x = F.max_pool2d(x, kernel_size=2, stride=2)
        residual = self.conv_res(x)
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)

        if self.SE:
            x = self.se(x)

        x += residual
        x = self.relu(x)
        return x

class SpatialGate2d(nn.Module):

    def __init__(self, in_channels):
        super(SpatialGate2d, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 1, kernel_size=1, stride=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        cal = self.conv1(x)
        cal = self.sigmoid(cal)
        return cal * x

class ChannelGate2d(nn.Module):

    def __init__(self, channels, reduction=2):
        super(ChannelGate2d, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
                             padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
                             padding=0)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)

        return module_input * x
      
class scSqueezeExcitationGate(nn.Module):
    def __init__(self, channels, reduction=16):
        super(scSqueezeExcitationGate, self).__init__()
        self.spatial_gate = SpatialGate2d(channels)
        self.channel_gate = ChannelGate2d(channels, reduction=reduction)

    def  forward(self, x, z=None):
        XsSE = self.spatial_gate(x)
        XcSe = self.channel_gate(x)
        return XsSE + XcSe

class PyramidPoolingModule(nn.Module):
    def __init__(self, pool_list, in_channels, size=(128, 128)):
        super(PyramidPoolingModule, self).__init__()
        self.size = size
        self.pool_list = pool_list
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.ModuleList([
            nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0) for _ in range(len(pool_list))])
        self.conv2 = nn.Conv2d(in_channels + len(pool_list), in_channels, kernel_size=1)

    def forward(self, x):
        cat = [x]
        for (k, s), conv in zip(self.pool_list, self.conv1):
            out = F.avg_pool2d(x, kernel_size=k, stride=s)
            out = conv(out)
            out = F.upsample(out, size=self.size, mode='bilinear', align_corners=True)
            cat.append(out)
        out = torch.cat(cat, 1)
        out = self.conv2(out)
        out = self.relu(out)
        return out
      
class Decoder_v3(nn.Module):
    def __init__(self, in_channels, convT_channels, out_channels, convT_ratio=2, SE=False):
        super(Decoder_v3, self).__init__()
        self.relu = nn.ReLU(inplace=True)
        self.SE = SE
        self.convT = nn.ConvTranspose2d(convT_channels, convT_channels // convT_ratio, kernel_size=2, stride=2)
        self.conv1 = ConvBn2d(in_channels  + convT_channels // convT_ratio, out_channels)
        self.conv2 = ConvBn2d(out_channels, out_channels)
        if SE:
            self.scSE = scSqueezeExcitationGate(out_channels)

        self.conv_res = nn.Conv2d(convT_channels // convT_ratio, out_channels, kernel_size=1, padding=0)

    def forward(self, x, skip):
        x = self.convT(x)
        residual = x
        x = torch.cat([x, skip], 1)
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        if self.SE:
            x = self.scSE(x)
        x += self.conv_res(residual)
        x = self.relu(x)
        return x

class UNetResNet34_SE_Hyper_SPP(nn.Module):
    # PyTorch U-Net model using ResNet(34, 50 , 101 or 152) encoder.

    def __init__(self, pretrained=True, activation='relu', **kwargs):
        super(UNetResNet34_SE_Hyper_SPP, self).__init__(**kwargs)
        if activation == 'relu':
            self.activation = nn.ReLU(inplace=True)
        elif activation == 'elu':
            self.activation = ELU_1(inplace=True)

        self.resnet = resnet34(pretrained=pretrained, activation=self.activation, SE=True)

        self.conv1 = nn.Sequential(
            self.resnet.conv1,
            self.resnet.bn1,
            self.resnet.activation,
        )  # 64

        self.encoder1 = self.resnet.layer1  # 64
        self.encoder2 = self.resnet.layer2  # 128
        self.encoder3 = self.resnet.layer3  # 256
        self.encoder4 = self.resnet.layer4  # 512

        self.center = CenterBlock(512, 64, pool=False, SE=False)

        self.decoder4 = Decoder_v3(256, 64,  64, convT_ratio=1,  SE=True)
        self.decoder3 = Decoder_v3(128, 64,  64, convT_ratio=1,  SE=True)
        self.decoder2 = Decoder_v3(64,  64,  64, convT_ratio=1,  SE=True)
        self.decoder1 = Decoder_v3(64,  64,  64, convT_ratio=1,  SE=True)

        self.reducer = ConvBn2d(64 * 5, 64, kernel_size=1, padding=0)

        self.logit_pixel  = nn.Sequential(
            nn.Conv2d(320, 64, kernel_size=3, padding=1),
#             ConvBn2d(64 * 5, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d( 64,  1, kernel_size=1, padding=0),
        )

        self.logit_image = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 1),
        )

        self.logit = nn.Sequential(
            ConvBn2d(64 * 5 + 512, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 1, kernel_size=1, padding=0),
        )

    def forward(self, x):
        # batch_size,C,H,W = x.shape
        mean=[0.485, 0.456, 0.406]
        std =[0.229, 0.224, 0.225]
        x = torch.cat([
            (x-mean[0])/std[0],
            (x-mean[1])/std[1],
            (x-mean[2])/std[2],
        ],1)

        x = self.conv1(x) # 128
        p = F.max_pool2d(x, kernel_size=2, stride=2) # 64

        e1 = self.encoder1(p)   # 64
        e2 = self.encoder2(e1)  # 32
        e3 = self.encoder3(e2)  # 16
        e4 = self.encoder4(e3)  # 8

        c = self.center(e4)  # 8

        d4 = self.decoder4(c, e3)  # 16
        d3 = self.decoder3(d4, e2)  # 32
        d2 = self.decoder2(d3, e1)  # 64
        d1 = self.decoder1(d2, x)   # 128

        f = torch.cat([
            d1,
            F.upsample(d2, scale_factor=2,  mode='bilinear', align_corners=False),
            F.upsample(d3, scale_factor=4,  mode='bilinear', align_corners=False),
            F.upsample(d4, scale_factor=8,  mode='bilinear', align_corners=False),
            F.upsample(c,  scale_factor=16, mode='bilinear', align_corners=False)
            ], 1)

        batch_size = f.size()[0]
#         fuse_pixel  = self.fuse_pixel(f)
        fuse_pixel  = f
        logit_pixel = self.logit_pixel(f)
        
        e = F.adaptive_avg_pool2d(e4, output_size=1).view(batch_size,-1) #image pool
#         fuse_image  = self.fuse_image(e)
        fuse_image  = e
        logit_image = self.logit_image(fuse_image).view(-1)
        
        fuse = torch.cat([ #fuse
            fuse_pixel,
            F.upsample(fuse_image.view(batch_size,-1,1,1,),scale_factor=128, mode='nearest')
        ],1)
        logit = self.logit(fuse)
        return logit, logit_pixel, logit_image
        
    def criterion1(self, logit, truth ):
        loss = RobustFocalLoss2d()(logit, truth, type='sigmoid')
        return loss
      
    def criterion2(self, logit, truth ):
        loss = LovaszLoss()(logit, truth)
        return loss

    def criterion(self, logit, logit_pixel, logit_image, truth_pixel, is_average=True):
        truth_image = truth_pixel.squeeze(1)
        truth_image = torch.sum(truth_image, (1, 2)).ge(0.1).float()
        loss_image = F.binary_cross_entropy_with_logits(logit_image, truth_image, reduce=is_average)

        loss_pixel = LovaszLoss()(logit_pixel, truth_pixel)
        #loss_pixel = FocalLoss2d(size_average=False)(logit_pixel, truth_pixel, type='sigmoid')
        loss_pixel = loss_pixel*truth_image.cpu().numpy() #loss for empty image is weighted 0
        if is_average:
            if truth_image.sum() == 0:
                loss_pixel = 0
            else:
                loss_pixel = loss_pixel.sum()/truth_image.sum()

        loss_all = LovaszLoss()(logit, truth_pixel, per_image=False)
        #loss_all = FocalLoss2d(size_average=False)(logit, truth_pixel, type='sigmoid')
        
        weight_pixel = 0.5
        weight_image = 0.05
        weight_all = 1.0

        return weight_pixel*loss_pixel + weight_image*loss_image + weight_all*loss_all

    def metric(self, logit, truth):
#         iou = iou_pytorch(logit, truth)
        pred = torch.sigmoid(logit)
        dice = dice_accuracy(pred, truth)
        return dice
#         return iou
 
    def set_mode(self, mode ):
        self.mode = mode
        if mode in ['eval', 'valid', 'test']:
            self.eval()
        elif mode in ['train']:
            self.train()
        else:
            raise NotImplementedError

SaltNet =  UNetResNet34_SE_Hyper_SPP

In [0]:
# loss function
class RobustFocalLoss2d(nn.Module):
    #assume top 10% is outliers
    def __init__(self, gamma=2, size_average=True):
        super(RobustFocalLoss2d, self).__init__()
        self.gamma = gamma
        self.size_average = size_average


    def forward(self, logit, target, class_weight=None, type='softmax'):
        target = target.view(-1, 1).long()


        if type=='sigmoid':
            if class_weight is None:
                class_weight = [1]*2 #[0.5, 0.5]

            prob   = torch.sigmoid(logit)
            prob   = prob.view(-1, 1)
            prob   = torch.cat((1-prob, prob), 1)
            select = torch.FloatTensor(len(prob), 2).zero_().cuda()
            select.scatter_(1, target, 1.)

        elif  type=='softmax':
            B,C,H,W = logit.size()
            if class_weight is None:
                class_weight =[1]*C #[1/C]*C

            logit   = logit.permute(0, 2, 3, 1).contiguous().view(-1, C)
            prob    = F.softmax(logit,1)
            select  = torch.FloatTensor(len(prob), C).zero_().cuda()
            select.scatter_(1, target, 1.)

        class_weight = torch.FloatTensor(class_weight).cuda().view(-1,1)
        class_weight = torch.gather(class_weight, 0, target)

        prob  = (prob*select).sum(1).view(-1,1)
        prob  = torch.clamp(prob,1e-8,1-1e-8)

        focus = torch.pow((1-prob), self.gamma)
        #focus = torch.where(focus < 2.0, focus, torch.zeros(prob.size()).cuda())
        focus = torch.clamp(focus,0,2)


        batch_loss = - class_weight *focus*prob.log()

        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss

        return loss

# Lovasz
try:
    from itertools import  ifilterfalse
except ImportError: # py3k
    from itertools import  filterfalse
    
def mean(l, ignore_nan=False, empty=0):
    """
    nanmean compatible with generators.
    """
    l = iter(l)
    if ignore_nan:
        l = ifilterfalse(np.isnan, l)
    try:
        n = 1
        acc = next(l)
    except StopIteration:
        if empty == 'raise':
            raise ValueError('Empty mean')
        return empty
    for n, v in enumerate(l, 2):
        acc += v
    if n == 1:
        return acc
    return acc / n

def lovasz_grad(gt_sorted):
    """
    Computes gradient of the Lovasz extension w.r.t sorted errors
    See Alg. 1 in paper
    """
    p = len(gt_sorted)
    gts = gt_sorted.sum()
    intersection = gts - gt_sorted.float().cumsum(0)
    union = gts + (1 - gt_sorted).float().cumsum(0)
    jaccard = 1. - intersection / union
    if p > 1: # cover 1-pixel case
        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
    return jaccard
  
def lovasz_hinge_flat(logits, labels):
    """
    Binary Lovasz hinge loss
      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
      labels: [P] Tensor, binary ground truth labels (0 or 1)
      ignore: label to ignore
    """
    if len(labels) == 0:
        # only void pixels, the gradients should be 0
        return logits.sum() * 0.
    signs = 2. * labels.float() - 1.
    errors = (1. - logits * Variable(signs))
    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
    perm = perm.data
    gt_sorted = labels[perm]
    grad = lovasz_grad(gt_sorted)
    loss = torch.dot(F.relu(errors_sorted), Variable(grad))
    return loss


def flatten_binary_scores(scores, labels, ignore=None):
    """
    Flattens predictions in the batch (binary case)
    Remove labels equal to 'ignore'
    """
    scores = scores.contiguous()
    scores = scores.view(-1)
    labels = labels.contiguous()
    labels = labels.view(-1)
    if ignore is None:
        return scores, labels
    valid = (labels != ignore)
    vscores = scores[valid]
    vlabels = labels[valid]
    return vscores, vlabels

def unpad_im(im, pad=((13, 14), (13, 14))):
    im = im[:, :, pad[0][0]:-pad[0][1], pad[1][0]:-pad[1][1]]
    return im

class LovaszLoss(nn.Module):
    def __init__(self):
        super(LovaszLoss, self).__init__()

    def forward(self, logits, labels, per_image=True, ignore=None):
        """
        Binary Lovasz hinge loss
          logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
          labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
          per_image: compute the loss per image instead of per batch
          ignore: void class id
        """
        logits = unpad_im(logits)
        labels = unpad_im(labels)
        if per_image:
            loss = [lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)) for log, lab in zip(logits, labels)]
        else:
            loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
                              for log, lab in zip(logits, labels))
#             loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
        return loss

In [0]:
# metric function
def unpad_im(im, pad=((13, 14), (13, 14))):
    im = im[:, :, pad[0][0]:-pad[0][1], pad[1][0]:-pad[1][1]]
    return im

def dice_accuracy(prob, truth, threshold=0.5, is_average=True, smooth=1e-12):
    prob = unpad_im(prob)
    truth = unpad_im(truth)

    batch_size = prob.size(0)
    p = prob.detach().contiguous().view(batch_size, -1)
    t = truth.detach().contiguous().view(batch_size, -1)

    p = p > threshold
    t = t > 0.5
    intersection = p & t
    union = p | t
    dice = (intersection.float().sum(1) + smooth) / (union.float().sum(1) + smooth)

    if is_average:
        dice = dice.sum() / batch_size

    return dice

def iou_pytorch(outputs: torch.Tensor, labels: torch.Tensor, lovasz=True):
    smooth = 1e-10
    outputs = outputs.squeeze(1)
    if lovasz:
       outputs = outputs > 0
    else:
       outputs = outputs > 0.5
    labels = labels.squeeze(1).byte()
    intersection = (outputs & labels).sum(dim=(1, 2)).float()
    union = (outputs | labels).sum(dim=(1, 2)).float() 
    
    iou = (intersection + smooth) / (union + smooth)
    
    thresholded = torch.clamp(20 * (iou - 0.5), 0, 10).ceil() / 10
    thresholded = thresholded.mean()
    return thresholded 

In [0]:
def add_depth_channels(image): # (101, 101, 3)
    image = image.astype('float32')
    h, w, _ = image.shape
    for row, const in enumerate(np.linspace(0, 1, h)):
        image[row, :, 1] = const
    image[:, :, 2] = image[:, :, 0] * image[:, :, 1]
    image[:, :, 1] = image[:, :, 2]
    return image

def load_test_image(image_path):
    """
    Load image from a given path and resize, so that eash side is divisible by 32 (newtwork requirement)

    """
    img = cv2.imread(str(image_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = add_depth_channels(img)
    
    reflect_img = cv2.flip(img, 1)
    
    img = cv2.copyMakeBorder(img, 13, 14, 13, 14, cv2.BORDER_REPLICATE)
    reflect_img = cv2.copyMakeBorder(reflect_img, 13, 14, 13, 14, cv2.BORDER_REPLICATE)

    img = img[:, :, 0:1] / 255.0
    reflect_img = reflect_img[:, :, 0:1] / 255.0
    return torch.from_numpy(np.transpose(img, (2, 0, 1)).astype('float32')), torch.from_numpy(np.transpose(reflect_img, (2, 0, 1)).astype('float32'))

def load_valid_image(image_path, mask_path):
    """
    Load image from a given path and resize, so that eash side is divisible by 32 (newtwork requirement)

    """
    img = cv2.imread(str(image_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = add_depth_channels(img)
    
    msk = cv2.imread(str(mask_path))
    msk = cv2.cvtColor(msk, cv2.COLOR_BGR2RGB)
    
    img = cv2.copyMakeBorder(img, 13, 14, 13, 14, cv2.BORDER_REPLICATE)
    msk = cv2.copyMakeBorder(msk, 13, 14, 13, 14, cv2.BORDER_REPLICATE)
    
    img = img[:, :, 0:1] / 255.0
    msk = msk[:, :, 0:1] // 255
  
    return torch.from_numpy(np.transpose(img, (2, 0, 1)).astype('float32')), torch.from_numpy(np.transpose(msk, (2, 0, 1)).astype('float32'))

def load_train_image(image_path, mask_path): 
    """
    Load image from a given path and resize, so that eash side is divisible by 32 (newtwork requirement)

    """
    img = cv2.imread(str(image_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    msk = cv2.imread(str(mask_path))
    msk = cv2.cvtColor(msk, cv2.COLOR_BGR2RGB)
    
    # flip
    if np.random.rand() < 0.5:
        img = cv2.flip(img, 1)
        msk = cv2.flip(msk, 1)
    
#     # bright
#     if np.random.rand() < 0.5:
#         gamma = np.random.uniform(0.92, 1.08)
#         gamma_cvt = np.zeros((256,1),dtype = 'uint8')

#         for i in range(256):
#             gamma_cvt[i][0] = 255 * (float(i)/255) ** (1.0/gamma)
#         img = cv2.LUT(img, gamma_cvt)
    
    # random crop | affine | zoom
#     if np.random.rand() < 0.5:
#         c = np.random.choice(2)
#         if c == 0:
#             x, y = np.random.randint(1, 12), np.random.randint(1, 12)
#             img = img[y:90 + y, x:90 + x, :]
#             img = cv2.resize(img, (101, 101))
#             msk = msk[y:90 + y, x:90 + x, :]
#             msk = cv2.resize(msk, (101, 101))
        
#         if c == 1:
#             x, y = np.random.randint(1, 5), np.random.randint(1, 5)
#             z = np.random.randint(1, x+y)
#             pts1 = np.float32([[0,0],[101,0],[101,101]])
#             pts2 = np.float32([[0-x,0],[101+y,0],[101+z,101]])
#             M = cv2.getAffineTransform(pts1,pts2)
#             img = cv2.warpAffine(img,M,(101,101))
#             msk = cv2.warpAffine(msk,M,(101,101))
    img = add_depth_channels(img)
    img = cv2.copyMakeBorder(img, 13, 14, 13, 14, cv2.BORDER_REPLICATE)
    msk = cv2.copyMakeBorder(msk, 13, 14, 13, 14, cv2.BORDER_REPLICATE)
    
    img = img[:, :, 0:1] / 255.0
    msk = msk[:, :, 0:1] // 255
  
    return torch.from_numpy(np.transpose(img, (2, 0, 1)).astype('float32')), torch.from_numpy(np.transpose(msk, (2, 0, 1)).astype('float32'))

In [0]:
# Adapted from vizualization kernel
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch

from torch.utils import data

class TGSSaltDataset(data.Dataset):
    def __init__(self, root_path, file_list, is_test = False, is_valid = False):
        self.is_test = is_test
        self.is_valid = is_valid
        self.root_path = root_path
        self.file_list = file_list
    
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, index):
        if index not in range(0, len(self.file_list)):
            return self.__getitem__(np.random.randint(0, self.__len__()))
        
        file_id = self.file_list[index]
        
        image_folder = os.path.join(self.root_path, "images")
        image_path = os.path.join(image_folder, file_id + ".png")
        
        mask_folder = os.path.join(self.root_path, "masks")
        mask_path = os.path.join(mask_folder, file_id + ".png")
        
        if self.is_test:
            image, reflect_image = load_test_image(image_path)
            return image, reflect_image
        elif self.is_valid:
            image, mask = load_valid_image(image_path, mask_path)
            return image, mask
        else:
            image, mask = load_train_image(image_path, mask_path)
            return image, mask

depths_df = pd.read_csv(os.path.join(directory, 'train.csv'))

train_path = os.path.join(directory, 'train')
file_list = list(depths_df['id'].values)

In [0]:
# https://github.com/leigh-plt/cs231n_hw2018/blob/master/assignment2/pytorch_tutorial.ipynb
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'state_dict': model.state_dict(),
             'optimizer' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to %s' % checkpoint_path)
    
def load_checkpoint(checkpoint_path, model, optimizer):
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    print('model loaded from %s' % checkpoint_path)

In [0]:
valid_ids = pd.read_csv("drive/kaggle/salt/seed/valid_index_seed{}_{}.csv".format(seed, fold), header=None)
file_list_val = list(valid_ids[0].values)
file_list_train = [f for f in file_list if f not in file_list_val]
dataset = TGSSaltDataset(train_path, file_list_train)
dataset_val = TGSSaltDataset(train_path, file_list_val, is_valid=True)

# First train
model = SaltNet().cuda()

epoch = 30
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)
# scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=10, min_lr=0.001)

best_iou = 0

for e in range(epoch):
    start = time.time()
    train_loss = []
    train_iou = []
    model.set_mode('train')
    for image, mask in tqdm(data.DataLoader(dataset, batch_size = 16, shuffle = True)):
      
        optimizer.zero_grad()
        
        image = image.type(torch.FloatTensor).cuda()
        logit, logit_pixel, logit_image = model(Variable(image))
        loss = model.criterion(logit, logit_pixel, logit_image, Variable(mask.cuda()))
        iou = model.metric(logit, Variable(mask.cuda()))

        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.data)
        train_iou.append(iou.data)
        
    val_loss = []
    val_iou = []
    model.set_mode('valid')
    for image, mask in data.DataLoader(dataset_val, batch_size = 16, shuffle = False):
        image = image.type(torch.FloatTensor).cuda()
        logit, logit_pixel, logit_image = model(Variable(image))
        loss = model.criterion(logit, logit_pixel, logit_image, Variable(mask.cuda()))
        iou = model.metric(logit, Variable(mask.cuda()))
        val_loss.append(loss.data)
        val_iou.append(iou.data)
    
#     scheduler.step(np.mean(val_iou))

    if np.mean(val_iou) > best_iou:
        best_iou = np.mean(val_iou)
        save_checkpoint('drive/model/seed{}_{}_scSE_binary_model1.pth'.format(seed, fold), model, optimizer)

    print("{}s \n".format(time.time() - start))
    print("Epoch: %d, Train_loss: %.3f, Train_iou: %.3f, Val_loss: %.3f, Val_iou: %.3f" % (e, np.mean(train_loss), np.mean(train_iou), np.mean(val_loss), np.mean(val_iou)))
# save the final model
# save_checkpoint('tgs-%i.pth' % epoch, model, optimizer)

In [0]:
# Second train
load_checkpoint('drive/model/seed{}_{}_scSE_binary_model1.pth'.format(seed, fold), model, optimizer)

epoch = 30
learning_rate = 0.005
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=6, min_lr=0.001)

best_iou = 0

for e in range(epoch):
    start = time.time()
    train_loss = []
    train_iou = []
    model.set_mode('train')
    for image, mask in tqdm(data.DataLoader(dataset, batch_size = 16, shuffle = True)):
      
        optimizer.zero_grad()
        
        image = image.type(torch.FloatTensor).cuda()
        logit, logit_pixel, logit_image = model(Variable(image))
        loss = model.criterion(logit, logit_pixel, logit_image, Variable(mask.cuda()))
        iou = model.metric(logit, Variable(mask.cuda()))

        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.data)
        train_iou.append(iou.data)
        
    val_loss = []
    val_iou = []
    model.set_mode('valid')
    for image, mask in data.DataLoader(dataset_val, batch_size = 16, shuffle = False):
        image = image.type(torch.FloatTensor).cuda()
        logit, logit_pixel, logit_image = model(Variable(image))
        loss = model.criterion(logit, logit_pixel, logit_image, Variable(mask.cuda()))
        iou = model.metric(logit, Variable(mask.cuda()))
        val_loss.append(loss.data)
        val_iou.append(iou.data)
    
    scheduler.step(np.mean(val_iou))

    if np.mean(val_iou) > best_iou:
        best_iou = np.mean(val_iou)
        save_checkpoint('drive/model/seed{}_{}_scSE_binary_model2.pth'.format(seed, fold), model, optimizer)

    print("{}s \n".format(time.time() - start))
    print("Epoch: %d, Train_loss: %.3f, Train_iou: %.3f, Val_loss: %.3f, Val_iou: %.3f" % (e, np.mean(train_loss), np.mean(train_iou), np.mean(val_loss), np.mean(val_iou)))
# save the final model
# save_checkpoint('tgs-%i.pth' % epoch, model, optimizer)

In [0]:
# Third train
load_checkpoint('drive/model/seed{}_{}_scSE_binary_model2.pth'.format(seed, fold), model, optimizer)

epoch = 30
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=6, min_lr=0.0001)

best_iou = 0

for e in range(epoch):
    start = time.time()
    train_loss = []
    train_iou = []
    model.set_mode('train')
    for image, mask in tqdm(data.DataLoader(dataset, batch_size = 16, shuffle = True)):
      
        optimizer.zero_grad()
        
        image = image.type(torch.FloatTensor).cuda()
        logit, logit_pixel, logit_image = model(Variable(image))
        loss = model.criterion(logit, logit_pixel, logit_image, Variable(mask.cuda()))
        iou = model.metric(logit, Variable(mask.cuda()))

        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.data)
        train_iou.append(iou.data)
        
    val_loss = []
    val_iou = []
    model.set_mode('valid')
    for image, mask in data.DataLoader(dataset_val, batch_size = 16, shuffle = False):
        image = image.type(torch.FloatTensor).cuda()
        logit, logit_pixel, logit_image = model(Variable(image))
        loss = model.criterion(logit, logit_pixel, logit_image, Variable(mask.cuda()))
        iou = model.metric(logit, Variable(mask.cuda()))
        val_loss.append(loss.data)
        val_iou.append(iou.data)
    
    scheduler.step(np.mean(val_iou))

    if np.mean(val_iou) > 0.865:
        best_iou = np.mean(val_iou)
        save_checkpoint('drive/model/seed{}_{}_scSE_binary_model3.pth'.format(seed, fold), model, optimizer)

    print("{}s \n".format(time.time() - start))
    print("Epoch: %d, Train_loss: %.3f, Train_iou: %.3f, Val_loss: %.3f, Val_iou: %.3f" % (e, np.mean(train_loss), np.mean(train_iou), np.mean(val_loss), np.mean(val_iou)))
# save the final model
# save_checkpoint('tgs-%i.pth' % epoch, model, optimizer)

In [0]:
valid_ids = pd.read_csv("drive/kaggle/salt/seed/valid_index_seed{}_{}.csv".format(seed, fold), header=None)
file_list_val = list(valid_ids[0].values)
file_list_train = [f for f in file_list if f not in file_list_val]
dataset = TGSSaltDataset(train_path, file_list_train)
dataset_val = TGSSaltDataset(train_path, file_list_val, is_valid=True)

# First train
model = SaltNet().cuda()

epoch = 30
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)

load_checkpoint('drive/model/seed{}_{}_scSE_binary_model2.pth'.format(seed, fold), model, optimizer)
# load_checkpoint('drive/model/resnet34_2nd.model', model, optimizer)

In [0]:
import glob

test_path = os.path.join(directory, 'test')
test_file_list = glob.glob(os.path.join(test_path, 'images', '*.png'))
test_file_list = [f.split('/')[-1].split('.')[0] for f in test_file_list]
print('First 3 names of test files:', test_file_list[:3])

In [0]:
from skimage.transform import resize

def downsample(img):
    img = np.squeeze(img)
    img = img[13:128 - 14, 13:128 - 14]
    return img

In [0]:
print(f"Test size: {len(test_file_list)}")
test_dataset = TGSSaltDataset(test_path, test_file_list, is_test = True)

all_predictions = []
model.set_mode('test')
for image in tqdm(data.DataLoader(test_dataset, batch_size = 15)):
    image1 = image[0].type(torch.FloatTensor).cuda()
    y_pred, logit_pixel, logit_image = model(image1)
    y_pred = y_pred.cpu().data.numpy()
    y_pred = np.array([downsample(x) for x in y_pred])
    
    image2 = image[1].type(torch.FloatTensor).cuda()
    y_pred_2, logit_pixel, logit_image = model(image2)
    y_pred_2 = y_pred_2.cpu().data.numpy()
    y_pred_2 = np.array([downsample(x) for x in y_pred_2])
    y_pred_2 = np.array([cv2.flip(x, 1) for x in y_pred_2])
    
    y_pred = (y_pred + y_pred_2) / 2
    all_predictions.append(y_pred)
all_predictions_stacked = np.vstack(all_predictions)[:, :, :]

In [0]:
all_predictions_stacked.shape

In [0]:
def add_depth_channels(image): # (101, 101, 3)
    image = image.astype('float32')
    h, w, _ = image.shape
    for row, const in enumerate(np.linspace(0, 1, h)):
        image[row, :, 1] = const
    image[:, :, 2] = image[:, :, 0] * image[:, :, 1]
    image[:, :, 1] = image[:, :, 2]
    return image

def load_check_image(path, mask = False):

    img = cv2.imread(str(path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    ref_img = cv2.flip(img, 1)
    img = add_depth_channels(img)
    ref_img = add_depth_channels(ref_img)
    
    if mask:
        img = img[:, :, 0:1] // 255        
        return torch.from_numpy(np.transpose(img, (2, 0, 1)).astype('float32'))
    else:
        img = cv2.copyMakeBorder(img, 13, 14, 13, 14, cv2.BORDER_REPLICATE)
        ref_img = cv2.copyMakeBorder(ref_img, 13, 14, 13, 14, cv2.BORDER_REPLICATE)
        img = img[:, :, 0:1] / 255.0
        ref_img = ref_img[:, :, 0:1] / 255.0
        return torch.from_numpy(np.transpose(img, (2, 0, 1)).astype('float32')), torch.from_numpy(np.transpose(ref_img, (2, 0, 1)).astype('float32'))
      
class TGSSaltCheckDataset(data.Dataset):
    def __init__(self, root_path, file_list):
        self.root_path = root_path
        self.file_list = file_list
    
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, index):
        if index not in range(0, len(self.file_list)):
            return self.__getitem__(np.random.randint(0, self.__len__()))
        
        file_id = self.file_list[index]
        
        image_folder = os.path.join(self.root_path, "images")
        image_path = os.path.join(image_folder, file_id + ".png")
        
        mask_folder = os.path.join(self.root_path, "masks")
        mask_path = os.path.join(mask_folder, file_id + ".png")
        
        image, ref_image = load_check_image(image_path)
        mask = load_check_image(mask_path, mask = True)
        return image, ref_image, mask

In [0]:
dataset_val = TGSSaltCheckDataset(train_path, file_list_val)

val_predictions = []
val_masks = []
model.set_mode('valid')
for image, ref_image, mask in tqdm(data.DataLoader(dataset_val, batch_size = 15)):
    image = Variable(image.type(torch.FloatTensor).cuda())
    y_pred, logit_pixel, logit_image = model(image)
    y_pred = y_pred.cpu().data.numpy()
    y_pred = np.array([downsample(x) for x in y_pred])
    
    image = Variable(ref_image.type(torch.FloatTensor).cuda())
    y_pred_2, logit_pixel, logit_image = model(image)
    y_pred_2 = y_pred_2.cpu().data.numpy()
    y_pred_2 = np.array([downsample(x) for x in y_pred_2])
    y_pred_2 = np.array([cv2.flip(x, 1) for x in y_pred_2])

    y_pred = (y_pred + y_pred_2) / 2
    val_predictions.append(y_pred)
    val_masks.append(mask)
    
val_predictions_stacked = np.vstack(val_predictions)[:, :, :]

val_masks_stacked = np.vstack(val_masks)[:, 0, :, :]
val_masks_stacked.shape, val_predictions_stacked.shape

In [0]:
#Score the model and do a threshold optimization by the best IoU.

# src: https://www.kaggle.com/aglotero/another-iou-metric
def iou_metric(y_true_in, y_pred_in, print_table=False):
    labels = y_true_in
    y_pred = y_pred_in


    true_objects = 2
    pred_objects = 2

    #  if all zeros, original code  generate wrong  bins [-0.5 0 0.5],
    temp1 = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=([0,0.5,1], [0,0.5, 1]))
#     temp1 = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=(true_objects, pred_objects))
    #print(temp1)
    intersection = temp1[0]
    #print("temp2 = ",temp1[1])
    #print(intersection.shape)
   # print(intersection)
    # Compute areas (needed for finding the union between all objects)
    #print(np.histogram(labels, bins = true_objects))
    area_true = np.histogram(labels,bins=[0,0.5,1])[0]
    #print("area_true = ",area_true)
    area_pred = np.histogram(y_pred, bins=[0,0.5,1])[0]
    area_true = np.expand_dims(area_true, -1)
    area_pred = np.expand_dims(area_pred, 0)

    # Compute union
    union = area_true + area_pred - intersection
  
    # Exclude background from the analysis
    intersection = intersection[1:,1:]
    intersection[intersection == 0] = 1e-9
    
    union = union[1:,1:]
    union[union == 0] = 1e-9

    # Compute the intersection over union
    iou = intersection / union

    # Precision helper function
    def precision_at(threshold, iou):
        matches = iou > threshold
        true_positives = np.sum(matches, axis=1) == 1   # Correct objects
        false_positives = np.sum(matches, axis=0) == 0  # Missed objects
        false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
        tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
        return tp, fp, fn

    # Loop over IoU thresholds
    prec = []
    if print_table:
        print("Thresh\tTP\tFP\tFN\tPrec.")
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, iou)
        if (tp + fp + fn) > 0:
            p = tp / (tp + fp + fn)
        else:
            p = 0
        if print_table:
            print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p))
        prec.append(p)
    
    if print_table:
        print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec)))
    return np.mean(prec)

def iou_metric_batch(y_true_in, y_pred_in):
    batch_size = y_true_in.shape[0]
    metric = []
    for batch in range(batch_size):
        value = iou_metric(y_true_in[batch], y_pred_in[batch])
        metric.append(value)
    return np.mean(metric)

In [0]:
# thresholds = np.linspace(0.3, 0.7, 31)
# ious = np.array([iou_metric_batch(val_masks_stacked, np.int32(val_predictions_stacked > threshold)) for threshold in tqdm(thresholds)])

## Scoring for last model, choose threshold by validation data 
thresholds_ori = np.linspace(0.3, 0.7, 31)
# Reverse sigmoid function: Use code below because the  sigmoid activation was removed
thresholds = np.log(thresholds_ori/(1-thresholds_ori))

ious = np.array([iou_metric_batch(val_masks_stacked, val_predictions_stacked > threshold) for threshold in tqdm(thresholds)])
print(ious)

threshold_best_index = np.argmax(ious) 
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]

plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, "xr", label="Best threshold")
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))
plt.legend()

In [0]:
threshold = threshold_best
binary_prediction = (all_predictions_stacked > 0).astype(int)

def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b > prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

all_masks = []
for p_mask in list(binary_prediction):
    p_mask = rle_encoding(p_mask)
    all_masks.append(' '.join(map(str, p_mask)))

In [0]:
submit = pd.DataFrame([test_file_list, all_masks]).T
submit.columns = ['id', 'rle_mask']
submit.to_csv('seed78_fold_4_binary.csv', index = False)
submit.head()

In [0]:
from google.colab import files

files.download('seed78_fold_4_binary.csv')