In [None]:
# 1 3 4 5 6 folds here

In [None]:
# tgs_1017: batch size 32, BORDER REPLICATE

In [1]:
import numpy as np
import pandas as pd

import os
import gc
import time
import sys
import logging
import math

from sklearn.model_selection import StratifiedKFold, train_test_split
from tqdm import tqdm_notebook, tqdm

from skimage.transform import AffineTransform, warp
from skimage.io import imread
from skimage import img_as_ubyte

from skimage.exposure import adjust_gamma

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, RandomSampler
from torchvision import models, datasets

import matplotlib.pyplot as plt

import cv2

PATH = '../input/'
if os.path.exists('../input/tgs-salt-identification-challenge/'):
    PATH = '../input/tgs-salt-identification-challenge/'
TR_IMG_PATH = PATH + 'train/images/'
TR_MASK_PATH = PATH + 'train/masks/'
TE_IMG_PATH = PATH + 'test/images/'

SEED = 20181016
N_SPLITS = 4
img_size = 101

# VERSION = 'tgs_1015'
NUM_WORKERS = 2

CHECKPOINT = False
SUBMIT = False

# LOG = 'tgs_1004'

In [2]:
df_train = pd.read_csv(PATH + 'train.csv', index_col='id')
depths = pd.read_csv(PATH + 'depths.csv', index_col='id')
df_train['depth'] = depths
df_test = pd.DataFrame(index=depths.index.drop(df_train.index))
df_test['depth'] = depths

def preprocess_img_gray(f):
    return (imread(f)/65535)[..., np.newaxis]
def preprocess_img_rgb(f):
    return imread(f)/255

images_train = np.empty((df_train.shape[0], img_size, img_size, 3), dtype=np.float32)
masks_train = np.empty((df_train.shape[0], img_size, img_size, 1), dtype=np.float32)
images_test = np.empty((df_test.shape[0], img_size, img_size, 3), dtype=np.float32)
for i, f in enumerate(tqdm_notebook(df_train.index)):
    images_train[i] = preprocess_img_rgb(TR_IMG_PATH + f + '.png')
    masks_train[i] = preprocess_img_gray(TR_MASK_PATH + f + '.png')
for i, f in enumerate(tqdm_notebook(df_test.index)):
    images_test[i] = preprocess_img_rgb(TE_IMG_PATH + f + '.png')

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=18000), HTML(value='')))




In [3]:
from torchvision.models.resnet import model_zoo, model_urls, BasicBlock

class ResNet34(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet34, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        # self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        # x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x
    
def resnet34(pretrained=False):
    model = ResNet34(BasicBlock, [3, 4, 6, 3])
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
    return model

In [4]:
class ConvBnRelu2d(nn.Module):
    
    def __init__(self, in_channels, out_channels,
                 kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
    
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x)


class cSE(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.avg = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(channels, channels//2)
        self.fc2 = nn.Linear(channels//2, channels)
        
    def forward(self, x):
        z = self.avg(x).squeeze()
        z = F.relu(self.fc1(z))
        z = torch.sigmoid(self.fc2(z))
        return z.reshape(*z.shape, 1, 1) * x

    
class scSE(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.avg = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(channels, channels//reduction)
        self.fc2 = nn.Linear(channels//reduction, channels)
        self.conv = nn.Conv2d(channels, 1, kernel_size=1)
        
    def forward(self, x):
        z = self.avg(x).squeeze()
        z = F.relu(self.fc1(z))
        z = torch.sigmoid(self.fc2(z))
        cse = z.reshape(*z.shape, 1, 1) * x
        
        q = torch.sigmoid(self.conv(x))
        sse = q * x
        
        return cse + sse
        

class Decoder(nn.Module):
    
    def __init__(self, in_channels, mid_channels, out_channels,
                 kernel_size=3, padding=1):
        super(Decoder, self).__init__()
        self.conv1 = ConvBnRelu2d(in_channels, mid_channels,
                                  kernel_size=3, stride=1, padding=1)
        self.conv2 = ConvBnRelu2d(mid_channels, out_channels,
                                  kernel_size=3, stride=1, padding=1)
        self.scse = scSE(out_channels)
    
    def forward(self, x, e=None, upsample=True):
        if upsample:
            x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False)
        if e is not None:
            x = torch.cat([x, e], dim=1)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.scse(x)
        return x
    

class SEResNet34Unet(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet = resnet34(pretrained=True)
        
        self.input = nn.Sequential(
            self.resnet.conv1,
            self.resnet.bn1,
            self.resnet.relu
        )
        self.encoder1 = self.resnet.layer1  # 64
        self.encoder2 = self.resnet.layer2  # 128
        self.encoder3 = self.resnet.layer3  # 256
        self.encoder4 = self.resnet.layer4  # 512
        
        self.scse1 = scSE(64)
        self.scse2 = scSE(128)
        self.scse3 = scSE(256)
        self.scse4 = scSE(512)
        
        self.center = nn.Sequential(
            ConvBnRelu2d(512, 512, kernel_size=3, padding=1),
            ConvBnRelu2d(512, 256, kernel_size=3, padding=1),
            # nn.MaxPool2d(kernel_size=2, stride=2),  # this step enables to add decoder0
        )
        
        self.decoder4 = Decoder(512 + 256, 512, 64)  # bottleneck
        self.decoder3 = Decoder(256 +  64, 256, 64)
        self.decoder2 = Decoder(128 +  64, 128, 64)
        self.decoder1 = Decoder( 64 +  64,  64, 64)
        self.decoder0 = Decoder( 64,        32, 64)
        
        self.conv_segment = nn.Sequential(
            nn.Conv2d(320, 64, kernel_size=3, padding=1),
            # nn.ReLU(),
        )
        self.conv_empty = nn.Sequential(
            nn.Conv2d(512, 64, kernel_size=1, padding=0),
            # nn.ReLU(),
        )
        self.fuse = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(),
        )
        
        self.logit_segment = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(64, 1, kernel_size=1, padding=0)
        )
        self.logit_empty = nn.Sequential(
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        self.logit_fuse = nn.Conv2d(64, 1, kernel_size=1, padding=0)
        
        
        
#         self.logit = nn.Sequential(
#             nn.Conv2d(320, 64, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.Conv2d(64, 1, kernel_size=1, padding=0)
#         )        
#         self.logit_empty = nn.Sequential(
#             nn.Linear(512, 128),
#             nn.ReLU(),
#             nn.Linear(128, 1)
#         )

    def forward(self, x):
        x = self.input(x)  # 64
        
        e1 = self.encoder1(x)  # 64
        e1 = self.scse1(e1)
        e2 = self.encoder2(e1)  # 32
        e2 = self.scse2(e2)
        e3 = self.encoder3(e2)  # 16
        e3 = self.scse3(e3)
        e4 = self.encoder4(e3)  # 8
        e4 = self.scse4(e4)
        
        c = self.center(e4)  # 4
        
        d4 = self.decoder4( c, e4, upsample=False)
        d3 = self.decoder3(d4, e3)
        d2 = self.decoder2(d3, e2)
        d1 = self.decoder1(d2, e1)
        d0 = self.decoder0(d1)
        
        h = torch.cat([
            d0,
            F.interpolate(d1, scale_factor=2, mode='bilinear', align_corners=False),
            F.interpolate(d2, scale_factor=4, mode='bilinear', align_corners=False),
            F.interpolate(d3, scale_factor=8, mode='bilinear', align_corners=False),
            F.interpolate(d4, scale_factor=16, mode='bilinear', align_corners=False),
        ], dim=1)
        h = self.conv_segment(h)
        logit_segment = self.logit_segment(h)
        
        e = F.adaptive_avg_pool2d(e4, output_size=1) # .view(batch_size, -1)  # (batch_size, 512)
        e = self.conv_empty(e).view(batch_size, -1)
        logit_empty = self.logit_empty(e).view(-1)
        
        fuse = self.fuse(torch.cat([
            h,
            F.interpolate(e.view(batch_size, -1, 1, 1), scale_factor=128, mode='nearest')
        ], 1))
        logit = self.logit_fuse(fuse)
        
        return logit, logit_segment, logit_empty

In [5]:
class TGSdataset(Dataset):
    def __init__(self, X, y, augment):
        self.X = X
        self.y = y
        self.augment = augment
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = self.X[idx]
        mask = self.y[idx]
        image, mask = self.augment(image, mask)
        return image, mask
    
def train_augmentation(Xi, yi):
    if np.random.uniform() > 0.5:
        Xi = Xi[:, ::-1, :]
        yi = yi[:, ::-1, :]
    
    if np.random.uniform() > 0.5:
        sw = np.random.randint(3)
        if sw == 0:
            xl, xr, yu, yd = np.random.randint(0, 11, 4)
            Xi = Xi[:, xl:101-xr, :]
            yi = yi[:, xl:101-xr, :]
            # Xi = Xi[yu:101-yd, xl:101-xr, :]
            # yi = yi[yu:101-yd, xl:101-xr, :]
            Xi = cv2.resize(Xi, dsize=(101, 101))
            yi = cv2.resize(yi, dsize=(101, 101), interpolation=cv2.INTER_NEAREST) 
            yi = (yi > 0.5).astype(np.float32)
        if sw == 1:
            dx = np.random.randint(-10, 11)
            M = np.array([[1, -2*dx/101, dx], [0, 1, 0]])  # cot(shr) = 2*dx / 101
            Xi = cv2.warpAffine(Xi, M, (101, 101), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
            yi = cv2.warpAffine(yi, M, (101, 101), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_REFLECT_101)
            yi = (yi > 0.5).astype(np.float32)
        if sw == 2:
            deg = np.random.uniform(-10, 10)
            M = cv2.getRotationMatrix2D((101/2, 101/2), deg, 1)
            Xi = cv2.warpAffine(Xi, M, (101, 101), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
            yi = cv2.warpAffine(yi, M, (101, 101), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_REFLECT_101)
            yi = (yi > 0.5).astype(np.float32)
            
    if np.random.uniform() > 0.5:
        sw = np.random.randint(3)
        if sw == 0:
            gamma = np.random.uniform(0.95, 1.05)
            Xi = np.clip(Xi ** gamma, 0, 1)
        if sw == 1:
            brs = np.random.uniform(-0.05, 0.05)
            Xi = np.clip(Xi + brs, 0, 1)
        if sw == 2:
            brm = np.random.uniform(0.95, 1.05)
            Xi = np.clip(Xi * brm, 0, 1)
            
    Xi = cv2.copyMakeBorder(Xi, 14, 13, 14, 13, cv2.BORDER_REPLICATE)
    yi = cv2.copyMakeBorder(yi, 14, 13, 14, 13, cv2.BORDER_REPLICATE)[...,np.newaxis]
    
    return Xi, yi

def valid_augmentation(Xi, yi):
    Xi = cv2.copyMakeBorder(Xi, 14, 13, 14, 13, cv2.BORDER_REPLICATE)
    yi = cv2.copyMakeBorder(yi, 14, 13, 14, 13, cv2.BORDER_REPLICATE)[...,np.newaxis]
    return Xi, yi

class TGSdataset_test(Dataset):
    def __init__(self, X):
        self.X = X
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        Xi = self.X[idx]
        return cv2.copyMakeBorder(Xi, 14, 13, 14, 13, cv2.BORDER_REPLICATE)

# got batches of a list of tuples (image, mask)
def tgs_collate(batch):
    len_batch = len(batch)
    image_batch = []
    mask_batch = []
    for i in range(len_batch):
        image_batch.append(batch[i][0])
        mask_batch.append(batch[i][1])
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image_batch = torch.from_numpy((np.array(image_batch) - mean) / std).permute([0, 3, 1, 2])
    mask_batch = torch.from_numpy(np.array(mask_batch)).permute([0, 3, 1, 2])
    empty_batch = torch.sum(mask_batch, dim=(1,2,3)) != 0
    return image_batch.float(), mask_batch.float(), empty_batch

def tgs_collate_test(batch):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image_batch = torch.from_numpy((np.array(batch) - mean) / std).permute([0, 3, 1, 2])
    return image_batch.float()

In [6]:
coverage = np.ceil(masks_train.sum(axis=(1,2,3)) / 101**2 * 10)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED).split(np.zeros(len(images_train)), coverage)

In [7]:
_, _ = next(skf)
_, _ = next(skf)
_, _ = next(skf)
_, _ = next(skf)
_, _ = next(skf)

In [8]:
tr_idx, val_idx = next(skf)
images_tr, images_val, masks_tr, masks_val =\
images_train[tr_idx], images_train[val_idx], masks_train[tr_idx], masks_train[val_idx]
train_dataset = TGSdataset(images_tr, masks_tr, train_augmentation)
valid_dataset = TGSdataset(images_val, masks_val, valid_augmentation)
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True, drop_last=True, collate_fn=tgs_collate)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True, drop_last=True, collate_fn=tgs_collate)

In [9]:
"""
Lovasz-Softmax and Jaccard hinge loss in PyTorch
Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
"""

from __future__ import print_function, division

import torch
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
try:
    from itertools import  ifilterfalse
except ImportError: # py3k
    from itertools import  filterfalse


def lovasz_grad(gt_sorted):
    """
    Computes gradient of the Lovasz extension w.r.t sorted errors
    See Alg. 1 in paper
    """
    p = len(gt_sorted)
    gts = gt_sorted.sum()
    intersection = gts - gt_sorted.float().cumsum(0)
    union = gts + (1 - gt_sorted).float().cumsum(0)
    jaccard = 1. - intersection / union
    if p > 1: # cover 1-pixel case
        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
    return jaccard


# --------------------------- BINARY LOSSES ---------------------------


def lovasz_logistic(logits, labels, per_image=True, ignore=None):
    """
    Binary Lovasz hinge loss
      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
      per_image: compute the loss per image instead of per batch
      ignore: void class id
    """
    if per_image:
        loss = mean(lovasz_logistic_flat(*flatten_binary_scores(log.squeeze(1).unsqueeze(0), lab.squeeze(1).unsqueeze(0), ignore))
                          for log, lab in zip(logits, labels))
    else:
        loss = lovasz_logistic_flat(*flatten_binary_scores(logits.squeeze(1), labels.squeeze(1), ignore))
    return loss


def lovasz_logistic_flat(logits, labels):
    if len(labels) == 0:
        return logits.sum() * 0.
    signs = 2. * labels.float() - 1.
    errors = torch.log(1. + torch.exp(-logits * Variable(signs)))
    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
    perm = perm.data
    gt_sorted = labels[perm]
    grad = lovasz_grad(gt_sorted)
    loss = torch.dot(errors_sorted, Variable(grad))
    return loss


# customed
def lovasz_hinge(logits, labels, per_image=True, ignore=None):
    """
    Binary Lovasz hinge loss
      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
      per_image: compute the loss per image instead of per batch
      ignore: void class id
    """
    if per_image:
        loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.squeeze(1).unsqueeze(0), lab.squeeze(1).unsqueeze(0), ignore))
                          for log, lab in zip(logits, labels))
    else:
        loss = lovasz_hinge_flat(*flatten_binary_scores(logits.squeeze(1), labels.squeeze(1), ignore))
    return loss


def lovasz_hinge_flat(logits, labels):
    """
    Binary Lovasz hinge loss
      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
      labels: [P] Tensor, binary ground truth labels (0 or 1)
      ignore: label to ignore
    """
    if len(labels) == 0:
        # only void pixels, the gradients should be 0
        return logits.sum() * 0.
    signs = 2. * labels.float() - 1.
    errors = (1. - logits * Variable(signs))
    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
    perm = perm.data
    gt_sorted = labels[perm]
    grad = lovasz_grad(gt_sorted)
    loss = torch.dot(F.elu(errors_sorted)+1, Variable(grad))
    return loss


def flatten_binary_scores(scores, labels, ignore=None):
    """
    Flattens predictions in the batch (binary case)
    Remove labels equal to 'ignore'
    """
    scores = scores.view(-1)
    labels = labels.view(-1)
    if ignore is None:
        return scores, labels
    valid = (labels != ignore)
    vscores = scores[valid]
    vlabels = labels[valid]
    return vscores, vlabels


# --------------------------- MULTICLASS LOSSES ---------------------------


def lovasz_softmax(probas, labels, only_present=False, per_image=False, ignore=None):
    """
    Multi-class Lovasz-Softmax loss
      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1)
      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
      only_present: average only on classes present in ground truth
      per_image: compute the loss per image instead of per batch
      ignore: void class labels
    """
    if per_image:
        loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), only_present=only_present)
                          for prob, lab in zip(probas, labels))
    else:
        loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), only_present=only_present)
    return loss


def lovasz_softmax_flat(probas, labels, only_present=False):
    """
    Multi-class Lovasz-Softmax loss
      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
      labels: [P] Tensor, ground truth labels (between 0 and C - 1)
      only_present: average only on classes present in ground truth
    """
    C = probas.size(1)
    losses = []
    for c in range(C):
        fg = (labels == c).float() # foreground for class c
        if only_present and fg.sum() == 0:
            continue
        errors = (Variable(fg) - probas[:, c]).abs()
        errors_sorted, perm = torch.sort(errors, 0, descending=True)
        perm = perm.data
        fg_sorted = fg[perm]
        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
    return mean(losses)


def flatten_probas(probas, labels, ignore=None):
    """
    Flattens predictions in the batch
    """
    B, C, H, W = probas.size()
    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
    labels = labels.view(-1)
    if ignore is None:
        return probas, labels
    valid = (labels != ignore)
    vprobas = probas[valid.nonzero().squeeze()]
    vlabels = labels[valid]
    return vprobas, vlabels


# --------------------------- HELPER FUNCTIONS ---------------------------

def mean(l, ignore_nan=False, empty=0):
    """
    nanmean compatible with generators.
    """
    l = iter(l)
    if ignore_nan:
        l = ifilterfalse(np.isnan, l)
    try:
        n = 1
        acc = next(l)
    except StopIteration:
        if empty == 'raise':
            raise ValueError('Empty mean')
        return empty
    for n, v in enumerate(l, 2):
        acc += v
    if n == 1:
        return acc
    return acc / n

In [10]:
def validation():
    model.eval()
    valid_loss = 0.0
    valid_score = 0.0
    empty_score = 0.0
    n_batch = len(valid_loader)
    for i, (img_batch, mask_batch, empty_batch) in enumerate(valid_loader):
        img_batch = img_batch.cuda()
        mask_batch = mask_batch.cuda()
        empty_batch = empty_batch.cuda()
        with torch.no_grad():
            logits, logits_segment, logits_empty = net(img_batch) # nn.parallel.data_parallel(model, img_batch)
            loss_, loss_segment, loss_empty = criterion(logits, logits_segment, logits_empty, mask_batch, empty_batch)
            loss = loss_ + loss_segment + loss_empty
            
            preds = torch.sigmoid(logits)
            preds_empty = torch.sigmoid(logits_empty).round().byte()
            score = iou(mask_batch, preds)
            e_score = (preds_empty == empty_batch).sum() / batch_size
        valid_loss += loss.item()
        valid_score += score.item()
        empty_score += e_score.item()
    model.train()
    return valid_loss / n_batch, valid_score / n_batch, empty_score / n_batch

def iou(y_true, y_pred):
    preds = y_pred.round().float()
    preds += y_true.float()
    u_ = (preds > 0).sum(dim=(2,3), dtype=torch.float)
    i_ = (preds > 1).sum(dim=(2,3), dtype=torch.float)
    iou = (i_ + 1e-10) / (u_ + 1e-10)
    threshold = torch.arange(0.5, 1.0, 0.05).cuda()
    return (iou > threshold).float().mean()

# def iou(y_true, y_pred, thr=0.5):
#     preds = (y_pred > thr).float()
#     preds += y_true
#     u_ = (preds > 0).sum(dim=(2,3), dtype=torch.float)  # do not squeeze to apply threshold
#     i_ = (preds == 2).sum(dim=(2,3), dtype=torch.float)
#     iou = torch.where(u_ == 0, torch.ones_like(u_), i_/u_)
#     threshold = torch.arange(0.5, 1.0, 0.05).cuda()
#     return (iou > threshold).float().mean()

In [6]:
def load_checkpoint(f_=None):
    MODEL_PATH = './pt/'
    if f_ is not None:
        checkpoint = torch.load(MODEL_PATH + f_)
        return checkpoint
    models = [f for f in os.listdir(MODEL_PATH) if 'pt' in f]
    checkpoint = torch.load(MODEL_PATH + models[0])
    return checkpoint

In [12]:
def train(epochs, start_epoch=1, scheduler=None, early_stopping=False, best_checkpoint=True, verbose=1):
    model.train()
    iter_valid = 300
    n_batch = len(train_loader)
    # log = logging.getLogger('train')
    # log.open('train.log', mode='a')

    best_validation_score = 0.0
    best_train_score = 0.0
    best_epoch = 0
    
    log_fn = f'log/{VERSION}_{start_epoch}_{start_epoch+epochs-1}.log'
    if os.path.exists(log_fn):
        for i in range(1, 10):
            log_fn = f'log/{VERSION}_{start_epoch}_{start_epoch+epochs-1}_{i}.log'
            if not os.path.exists(log_fn):
                break
    log_file = open(log_fn, 'w')
    
    for e in range(epochs):
        epoch = start_epoch + e
        t_ = time.time()
        running_loss = 0.0
        running_score = 0.0
        valid_loss = 0.0
        valid_score = 0.0
        empty_score = 0.0
        valid_empty_score = 0.0
        n_valid = 0
        
        for i, (img_batch, mask_batch, empty_batch) in enumerate(train_loader, 1):
            img_batch = img_batch.float().cuda()
            mask_batch = mask_batch.float().cuda()
            empty_batch = empty_batch.cuda()
            logits, logits_segment, logits_empty = net(img_batch) # nn.parallel.data_parallel(model, img_batch)
            loss_, loss_segment, loss_empty = criterion(logits, logits_segment, logits_empty, mask_batch, empty_batch)
            loss = loss_ + loss_segment + loss_empty
            
            preds = torch.sigmoid(logits)
            preds_empty = torch.sigmoid(logits_empty).round().byte()
            score = iou(mask_batch, preds)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            running_score += score.item()
            empty_score += ((preds_empty == empty_batch).sum() / batch_size).item()
            
            if i % iter_valid == 0:
                n_valid += 1
                v_l, v_s, e_s = validation()
                valid_loss += v_l
                valid_score += v_s
                valid_empty_score += e_s
                print('%3d / %d, Epoch: %3d  Train Loss: %.6f  Train Score: %.6f  Accuracy: %.6f  Valid Loss: %.6f  Valid Score: %.6f  Valid Accuracy: %.6f\r'
                      % (i, n_batch, epoch, running_loss/i, running_score/i, empty_score/i, valid_loss/n_valid, valid_score/n_valid, valid_empty_score/n_valid), end= "")
                continue
            
            if verbose > 0:
                print('%3d / %d, Epoch: %3d  Train Loss: %.6f  Train Score: %.6f  Accuracy: %.6f\r'
                      % (i, n_batch, epoch, running_loss/i, running_score/i, empty_score/i), end= "")
        # end for in enumerate(train_loader, 1)

        n_valid += 1
        v_l, v_s, e_s = validation()
        valid_loss += v_l
        valid_score += v_s
        valid_empty_score += e_s
        if scheduler is not None:
            scheduler.step(valid_score/n_valid)
        
        # checkpoint utilities
        if valid_score/n_valid > best_validation_score:
            best_validation_score = valid_score / n_valid
            best_train_score = running_score / i
            best_epoch = epoch
            if best_checkpoint:
                model_state_dict = model.state_dict()
                optimizer_state_dict = optimizer.state_dict()
        if early_stopping:
            if epoch - best_epoch == early_stopping:
                break

        log_message = '%3d / %d, Epoch: %3d  Train Loss: %.6f  Train Score: %.6f  Accuracy: %.6f  Valid Loss: %.6f  Valid Score: %.6f  Valid Accuracy: %.6f  %3d Sec.'\
        % (i, n_batch, epoch, running_loss/i, running_score/i, empty_score/i, valid_loss/n_valid, valid_score/n_valid, valid_empty_score/n_valid, time.time() - t_)
        print(log_message)
        print(log_message, file=log_file)
        epoch += 1

    # end for in range(epochs)
    
    log_file.close()
    # save model
    model_path = 'pt/'
    if best_checkpoint:
        f_ = f'{VERSION}_{best_epoch}_{best_validation_score:.5f}.pt'
        torch.save({
            'epoch': best_epoch,
            'model_state_dict': model_state_dict,
            'optimizer_state_dict': optimizer_state_dict,
            'best_validation_score': best_validation_score,
            'best_train_score': best_train_score,
        }, model_path + f_)

    f_ = f'{VERSION}_{epoch-1}_{valid_score/n_valid:.5f}.pt'
    torch.save({
        'epoch': epochs,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_score': running_score/n_batch,
        'validation_score': valid_score,
    }, model_path + f_)
    return

In [7]:
model = SEResNet34Unet()
net = model.cuda()
# net = nn.DataParallel(model).cuda()

In [14]:
def criterion(logits, logits_segment, logits_empty, truth, truth_empty):
    loss = lovasz_hinge(logits, truth)    
    loss_segment = lovasz_hinge(logits_segment[truth_empty], truth[truth_empty])
    loss_empty = F.binary_cross_entropy_with_logits(logits_empty, truth_empty.float())
    return loss, 0.5*loss_segment, 0.05*loss_empty

In [15]:
lr = 0.01
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0001)

In [None]:
VERSION = 'tgs_1017_fold6'
train(epochs=90)

112 / 112, Epoch:   1  Train Loss: 2.410121  Train Score: 0.543778  Accuracy: 0.000000  Valid Loss: 1.977074  Valid Score: 0.634375  Valid Accuracy: 0.000000   41 Sec.
 32 / 112, Epoch:   2  Train Loss: 1.788317  Train Score: 0.684570  Accuracy: 0.062500

In [None]:
optimizer.param_groups[0]['lr'] = 0.001
train(epochs=60, start_epoch=91)

In [16]:
VERSION = 'tgs_1017_fold5'
train(epochs=90)

112 / 112, Epoch:   1  Train Loss: 2.475530  Train Score: 0.524749  Accuracy: 0.000000  Valid Loss: 1.979031  Valid Score: 0.683594  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   2  Train Loss: 1.749631  Train Score: 0.687416  Accuracy: 0.008929  Valid Loss: 1.505675  Valid Score: 0.755990  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   3  Train Loss: 1.520802  Train Score: 0.734933  Accuracy: 0.017857  Valid Loss: 1.514370  Valid Score: 0.759375  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   4  Train Loss: 1.389388  Train Score: 0.760519  Accuracy: 0.008929  Valid Loss: 1.346280  Valid Score: 0.784635  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   5  Train Loss: 1.343103  Train Score: 0.775391  Accuracy: 0.008929  Valid Loss: 1.184042  Valid Score: 0.802083  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   6  Train Loss: 1.254469  Train Score: 0.790151  Accuracy: 0.035714  Valid Loss: 1.259888  Valid Score: 0.784115  Valid Accuracy: 0.000000   

112 / 112, Epoch:  50  Train Loss: 0.661388  Train Score: 0.890179  Accuracy: 0.455357  Valid Loss: 0.986014  Valid Score: 0.819010  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  51  Train Loss: 0.643986  Train Score: 0.890374  Accuracy: 0.473214  Valid Loss: 1.138384  Valid Score: 0.858333  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  52  Train Loss: 0.666328  Train Score: 0.892076  Accuracy: 0.455357  Valid Loss: 1.105067  Valid Score: 0.836979  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  53  Train Loss: 0.612242  Train Score: 0.899051  Accuracy: 0.508929  Valid Loss: 1.077482  Valid Score: 0.818229  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  54  Train Loss: 0.587491  Train Score: 0.903013  Accuracy: 0.535714  Valid Loss: 1.045044  Valid Score: 0.840885  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  55  Train Loss: 0.670481  Train Score: 0.887946  Accuracy: 0.473214  Valid Loss: 1.034853  Valid Score: 0.833333  Valid Accuracy: 0.166667   

In [17]:
optimizer.param_groups[0]['lr'] = 0.001
train(epochs=60, start_epoch=91)

112 / 112, Epoch:  91  Train Loss: 0.435598  Train Score: 0.929408  Accuracy: 0.776786  Valid Loss: 0.995265  Valid Score: 0.867448  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  92  Train Loss: 0.442495  Train Score: 0.928265  Accuracy: 0.767857  Valid Loss: 0.972444  Valid Score: 0.868750  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  93  Train Loss: 0.413796  Train Score: 0.932673  Accuracy: 0.812500  Valid Loss: 1.006143  Valid Score: 0.865885  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:  94  Train Loss: 0.403523  Train Score: 0.936802  Accuracy: 0.812500  Valid Loss: 0.997158  Valid Score: 0.868229  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  95  Train Loss: 0.401316  Train Score: 0.935073  Accuracy: 0.812500  Valid Loss: 1.016942  Valid Score: 0.861719  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  96  Train Loss: 0.400506  Train Score: 0.935882  Accuracy: 0.857143  Valid Loss: 1.051957  Valid Score: 0.859896  Valid Accuracy: 0.000000   

112 / 112, Epoch: 140  Train Loss: 0.349824  Train Score: 0.946094  Accuracy: 0.883929  Valid Loss: 1.039126  Valid Score: 0.869531  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch: 141  Train Loss: 0.345648  Train Score: 0.946094  Accuracy: 0.892857  Valid Loss: 1.043318  Valid Score: 0.867188  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch: 142  Train Loss: 0.334220  Train Score: 0.950223  Accuracy: 0.812500  Valid Loss: 1.044449  Valid Score: 0.862240  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch: 143  Train Loss: 0.344366  Train Score: 0.948131  Accuracy: 0.892857  Valid Loss: 1.046604  Valid Score: 0.863021  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch: 144  Train Loss: 0.332283  Train Score: 0.948800  Accuracy: 0.901786  Valid Loss: 1.077909  Valid Score: 0.864323  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch: 145  Train Loss: 0.348326  Train Score: 0.947294  Accuracy: 0.848214  Valid Loss: 1.128842  Valid Score: 0.860156  Valid Accuracy: 0.000000   

In [16]:
VERSION = 'tgs_1017_fold4'
train(epochs=90)

112 / 112, Epoch:   1  Train Loss: 2.517469  Train Score: 0.524888  Accuracy: 0.008929  Valid Loss: 2.166930  Valid Score: 0.646875  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   2  Train Loss: 1.695437  Train Score: 0.709487  Accuracy: 0.000000  Valid Loss: 1.664981  Valid Score: 0.726823  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   3  Train Loss: 1.461313  Train Score: 0.752455  Accuracy: 0.017857  Valid Loss: 1.462445  Valid Score: 0.760156  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:   4  Train Loss: 1.324205  Train Score: 0.778739  Accuracy: 0.044643  Valid Loss: 1.495941  Valid Score: 0.747135  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   5  Train Loss: 1.308666  Train Score: 0.782143  Accuracy: 0.071429  Valid Loss: 1.410405  Valid Score: 0.774219  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   6  Train Loss: 1.274583  Train Score: 0.790179  Accuracy: 0.017857  Valid Loss: 1.613998  Valid Score: 0.756250  Valid Accuracy: 0.083333   

112 / 112, Epoch:  50  Train Loss: 0.624932  Train Score: 0.894057  Accuracy: 0.446429  Valid Loss: 1.254829  Valid Score: 0.813021  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  51  Train Loss: 0.621791  Train Score: 0.894141  Accuracy: 0.562500  Valid Loss: 1.208979  Valid Score: 0.764063  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  52  Train Loss: 0.639067  Train Score: 0.893443  Accuracy: 0.491071  Valid Loss: 1.198965  Valid Score: 0.831771  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  53  Train Loss: 0.603670  Train Score: 0.897768  Accuracy: 0.553571  Valid Loss: 1.303074  Valid Score: 0.828385  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  54  Train Loss: 0.592155  Train Score: 0.902260  Accuracy: 0.508929  Valid Loss: 1.238691  Valid Score: 0.826823  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:  55  Train Loss: 0.598295  Train Score: 0.901730  Accuracy: 0.571429  Valid Loss: 1.156230  Valid Score: 0.817188  Valid Accuracy: 0.000000   

In [17]:
optimizer.param_groups[0]['lr'] = 0.001
train(epochs=60, start_epoch=91)

112 / 112, Epoch:  91  Train Loss: 0.435992  Train Score: 0.930469  Accuracy: 0.750000  Valid Loss: 1.245469  Valid Score: 0.839844  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  92  Train Loss: 0.412102  Train Score: 0.934040  Accuracy: 0.767857  Valid Loss: 1.248703  Valid Score: 0.845833  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  93  Train Loss: 0.394552  Train Score: 0.938616  Accuracy: 0.812500  Valid Loss: 1.113304  Valid Score: 0.854948  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:  94  Train Loss: 0.387332  Train Score: 0.940067  Accuracy: 0.875000  Valid Loss: 1.264995  Valid Score: 0.837760  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  95  Train Loss: 0.390659  Train Score: 0.938756  Accuracy: 0.875000  Valid Loss: 1.296876  Valid Score: 0.841146  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  96  Train Loss: 0.379953  Train Score: 0.939035  Accuracy: 0.830357  Valid Loss: 1.280472  Valid Score: 0.844010  Valid Accuracy: 0.083333   

112 / 112, Epoch: 140  Train Loss: 0.327972  Train Score: 0.951897  Accuracy: 0.883929  Valid Loss: 1.335780  Valid Score: 0.836198  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch: 141  Train Loss: 0.327520  Train Score: 0.951172  Accuracy: 0.883929  Valid Loss: 1.314855  Valid Score: 0.848698  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch: 142  Train Loss: 0.325924  Train Score: 0.952260  Accuracy: 0.866071  Valid Loss: 1.266034  Valid Score: 0.838802  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch: 143  Train Loss: 0.330495  Train Score: 0.952204  Accuracy: 0.892857  Valid Loss: 1.247367  Valid Score: 0.856510  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch: 144  Train Loss: 0.330477  Train Score: 0.951423  Accuracy: 0.875000  Valid Loss: 1.248250  Valid Score: 0.844792  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch: 145  Train Loss: 0.328063  Train Score: 0.951283  Accuracy: 0.857143  Valid Loss: 1.358251  Valid Score: 0.841927  Valid Accuracy: 0.083333   

In [16]:
VERSION = 'tgs_1017_fold3'
train(epochs=90)

112 / 112, Epoch:   1  Train Loss: 2.504461  Train Score: 0.512974  Accuracy: 0.000000  Valid Loss: 2.036193  Valid Score: 0.638021  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:   2  Train Loss: 1.739110  Train Score: 0.701339  Accuracy: 0.000000  Valid Loss: 1.348235  Valid Score: 0.768490  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   3  Train Loss: 1.578254  Train Score: 0.730190  Accuracy: 0.008929  Valid Loss: 1.343301  Valid Score: 0.781510  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   4  Train Loss: 1.427491  Train Score: 0.762556  Accuracy: 0.026786  Valid Loss: 1.329191  Valid Score: 0.801563  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   5  Train Loss: 1.351140  Train Score: 0.772210  Accuracy: 0.062500  Valid Loss: 1.211025  Valid Score: 0.800260  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   6  Train Loss: 1.246926  Train Score: 0.795787  Accuracy: 0.044643  Valid Loss: 1.211201  Valid Score: 0.803646  Valid Accuracy: 0.000000   

112 / 112, Epoch:  50  Train Loss: 0.636262  Train Score: 0.891323  Accuracy: 0.455357  Valid Loss: 0.937930  Valid Score: 0.850521  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:  51  Train Loss: 0.639390  Train Score: 0.893862  Accuracy: 0.482143  Valid Loss: 0.857300  Valid Score: 0.859896  Valid Accuracy: 0.333333   41 Sec.
112 / 112, Epoch:  52  Train Loss: 0.621413  Train Score: 0.891964  Accuracy: 0.517857  Valid Loss: 1.080104  Valid Score: 0.858594  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:  53  Train Loss: 0.588019  Train Score: 0.901256  Accuracy: 0.500000  Valid Loss: 0.998960  Valid Score: 0.855208  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:  54  Train Loss: 0.581604  Train Score: 0.902679  Accuracy: 0.598214  Valid Loss: 1.168374  Valid Score: 0.819531  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  55  Train Loss: 0.614064  Train Score: 0.895452  Accuracy: 0.526786  Valid Loss: 0.916807  Valid Score: 0.863281  Valid Accuracy: 0.166667   

In [17]:
optimizer.param_groups[0]['lr'] = 0.001
train(epochs=60, start_epoch=91)

112 / 112, Epoch:  91  Train Loss: 0.441225  Train Score: 0.928209  Accuracy: 0.785714  Valid Loss: 0.844228  Valid Score: 0.865104  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  92  Train Loss: 0.427744  Train Score: 0.933147  Accuracy: 0.741071  Valid Loss: 0.840717  Valid Score: 0.867708  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  93  Train Loss: 0.408208  Train Score: 0.933845  Accuracy: 0.776786  Valid Loss: 0.818979  Valid Score: 0.873177  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:  94  Train Loss: 0.407415  Train Score: 0.935156  Accuracy: 0.848214  Valid Loss: 0.788557  Valid Score: 0.882552  Valid Accuracy: 0.500000   41 Sec.
112 / 112, Epoch:  95  Train Loss: 0.417097  Train Score: 0.932171  Accuracy: 0.794643  Valid Loss: 0.773723  Valid Score: 0.878906  Valid Accuracy: 0.250000   41 Sec.
112 / 112, Epoch:  96  Train Loss: 0.386974  Train Score: 0.938867  Accuracy: 0.848214  Valid Loss: 0.819498  Valid Score: 0.878906  Valid Accuracy: 0.333333   

112 / 112, Epoch: 140  Train Loss: 0.335471  Train Score: 0.947266  Accuracy: 0.901786  Valid Loss: 0.854573  Valid Score: 0.881771  Valid Accuracy: 0.333333   41 Sec.
112 / 112, Epoch: 141  Train Loss: 0.330159  Train Score: 0.949777  Accuracy: 0.883929  Valid Loss: 0.854994  Valid Score: 0.877083  Valid Accuracy: 0.416667   41 Sec.
112 / 112, Epoch: 142  Train Loss: 0.339825  Train Score: 0.948438  Accuracy: 0.892857  Valid Loss: 0.885927  Valid Score: 0.877344  Valid Accuracy: 0.250000   41 Sec.
112 / 112, Epoch: 143  Train Loss: 0.347905  Train Score: 0.948047  Accuracy: 0.866071  Valid Loss: 0.797120  Valid Score: 0.880469  Valid Accuracy: 0.250000   41 Sec.
112 / 112, Epoch: 144  Train Loss: 0.341867  Train Score: 0.947294  Accuracy: 0.892857  Valid Loss: 0.848524  Valid Score: 0.877604  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch: 145  Train Loss: 0.333219  Train Score: 0.949554  Accuracy: 0.857143  Valid Loss: 0.881400  Valid Score: 0.872656  Valid Accuracy: 0.166667   

In [16]:
VERSION = 'tgs_1017_fold2'
train(epochs=30, best_checkpoint=False)

112 / 112, Epoch:   1  Train Loss: 2.583471  Train Score: 0.503376  Accuracy: 0.008929  Valid Loss: 2.055345  Valid Score: 0.655729  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:   2  Train Loss: 1.900503  Train Score: 0.671345  Accuracy: 0.008929  Valid Loss: 1.642202  Valid Score: 0.730729  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:   3  Train Loss: 1.593388  Train Score: 0.726088  Accuracy: 0.035714  Valid Loss: 1.274848  Valid Score: 0.780729  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:   4  Train Loss: 1.479060  Train Score: 0.750586  Accuracy: 0.017857  Valid Loss: 1.296238  Valid Score: 0.788542  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:   5  Train Loss: 1.389114  Train Score: 0.768052  Accuracy: 0.044643  Valid Loss: 1.126289  Valid Score: 0.816667  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:   6  Train Loss: 1.322674  Train Score: 0.778376  Accuracy: 0.062500  Valid Loss: 1.190281  Valid Score: 0.815625  Valid Accuracy: 0.083333   

In [17]:
train(epochs=60, start_epoch=31)

112 / 112, Epoch:  31  Train Loss: 0.760271  Train Score: 0.875502  Accuracy: 0.339286  Valid Loss: 0.955405  Valid Score: 0.869271  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  32  Train Loss: 0.787562  Train Score: 0.861775  Accuracy: 0.250000  Valid Loss: 1.078605  Valid Score: 0.841406  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  33  Train Loss: 0.783587  Train Score: 0.865792  Accuracy: 0.267857  Valid Loss: 0.983447  Valid Score: 0.839063  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  34  Train Loss: 0.749169  Train Score: 0.871763  Accuracy: 0.321429  Valid Loss: 0.926175  Valid Score: 0.860417  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  35  Train Loss: 0.752543  Train Score: 0.873326  Accuracy: 0.348214  Valid Loss: 0.977303  Valid Score: 0.832813  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  36  Train Loss: 0.742582  Train Score: 0.873912  Accuracy: 0.285714  Valid Loss: 1.102225  Valid Score: 0.829688  Valid Accuracy: 0.000000   

112 / 112, Epoch:  80  Train Loss: 0.541919  Train Score: 0.912305  Accuracy: 0.651786  Valid Loss: 0.964370  Valid Score: 0.865625  Valid Accuracy: 0.250000   40 Sec.
112 / 112, Epoch:  81  Train Loss: 0.545788  Train Score: 0.907366  Accuracy: 0.669643  Valid Loss: 0.939731  Valid Score: 0.876302  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:  82  Train Loss: 0.540499  Train Score: 0.911468  Accuracy: 0.589286  Valid Loss: 0.938711  Valid Score: 0.866406  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  83  Train Loss: 0.540312  Train Score: 0.911356  Accuracy: 0.571429  Valid Loss: 1.238113  Valid Score: 0.828646  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:  84  Train Loss: 0.539967  Train Score: 0.912528  Accuracy: 0.642857  Valid Loss: 1.037928  Valid Score: 0.857292  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  85  Train Loss: 0.509366  Train Score: 0.913811  Accuracy: 0.678571  Valid Loss: 0.859732  Valid Score: 0.876042  Valid Accuracy: 0.166667   

In [18]:
optimizer.param_groups[0]['lr'] = 0.001
train(epochs=30, start_epoch=91)

112 / 112, Epoch:  91  Train Loss: 0.475589  Train Score: 0.921373  Accuracy: 0.660714  Valid Loss: 1.014276  Valid Score: 0.863802  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  92  Train Loss: 0.431190  Train Score: 0.929018  Accuracy: 0.785714  Valid Loss: 0.977724  Valid Score: 0.877083  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  93  Train Loss: 0.428452  Train Score: 0.929241  Accuracy: 0.839286  Valid Loss: 0.991336  Valid Score: 0.871875  Valid Accuracy: 0.250000   40 Sec.
112 / 112, Epoch:  94  Train Loss: 0.426977  Train Score: 0.928878  Accuracy: 0.776786  Valid Loss: 0.936334  Valid Score: 0.873958  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  95  Train Loss: 0.407542  Train Score: 0.933566  Accuracy: 0.839286  Valid Loss: 0.905151  Valid Score: 0.870573  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  96  Train Loss: 0.405239  Train Score: 0.934515  Accuracy: 0.901786  Valid Loss: 0.928417  Valid Score: 0.863542  Valid Accuracy: 0.000000   

In [15]:
lr = 0.001
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0001)
CHECKPOINT=True
if CHECKPOINT:
    checkpoint = load_checkpoint(f_='tgs_1017_fold2_120_0.87448.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    last_epoch = checkpoint['epoch']

In [16]:
VERSION = 'tgs_1017_fold2'
train(epochs=30, start_epoch=121)

112 / 112, Epoch: 121  Train Loss: 0.353164  Train Score: 0.945285  Accuracy: 0.848214  Valid Loss: 0.934832  Valid Score: 0.877865  Valid Accuracy: 0.333333   40 Sec.
112 / 112, Epoch: 122  Train Loss: 0.365129  Train Score: 0.941685  Accuracy: 0.866071  Valid Loss: 0.951383  Valid Score: 0.871354  Valid Accuracy: 0.250000   40 Sec.
112 / 112, Epoch: 123  Train Loss: 0.373100  Train Score: 0.940206  Accuracy: 0.866071  Valid Loss: 0.928200  Valid Score: 0.877865  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch: 124  Train Loss: 0.371871  Train Score: 0.940792  Accuracy: 0.803571  Valid Loss: 0.934500  Valid Score: 0.868490  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch: 125  Train Loss: 0.359028  Train Score: 0.943415  Accuracy: 0.875000  Valid Loss: 0.884932  Valid Score: 0.880208  Valid Accuracy: 0.250000   40 Sec.
112 / 112, Epoch: 126  Train Loss: 0.359640  Train Score: 0.944085  Accuracy: 0.857143  Valid Loss: 0.957474  Valid Score: 0.865365  Valid Accuracy: 0.000000   

In [15]:
VERSION = 'tgs_1017'
train(epochs=30, best_checkpoint=False)

112 / 112, Epoch:   1  Train Loss: 2.489647  Train Score: 0.526563  Accuracy: 0.000000  Valid Loss: 1.993154  Valid Score: 0.670833  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:   2  Train Loss: 1.783026  Train Score: 0.689453  Accuracy: 0.008929  Valid Loss: 1.470864  Valid Score: 0.752344  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:   3  Train Loss: 1.455709  Train Score: 0.755469  Accuracy: 0.026786  Valid Loss: 1.465141  Valid Score: 0.753385  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:   4  Train Loss: 1.387994  Train Score: 0.762556  Accuracy: 0.035714  Valid Loss: 1.455904  Valid Score: 0.736719  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:   5  Train Loss: 1.286273  Train Score: 0.781083  Accuracy: 0.062500  Valid Loss: 1.370450  Valid Score: 0.770833  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:   6  Train Loss: 1.224268  Train Score: 0.787584  Accuracy: 0.098214  Valid Loss: 1.341088  Valid Score: 0.762760  Valid Accuracy: 0.000000   

In [17]:
# optimizer.param_groups[0]['lr'] = 0.005
# optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=7)

In [None]:
CHECKPOINT=False
if CHECKPOINT:
    checkpoint = load_checkpoint(f_='tgs_1015_hinge_50_0.81713.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    last_epoch = checkpoint['epoch']

In [18]:
train(epochs=30, start_epoch=31, scheduler=scheduler)

112 / 112, Epoch:  31  Train Loss: 0.757423  Train Score: 0.869531  Accuracy: 0.250000  Valid Loss: 1.054382  Valid Score: 0.842969  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  32  Train Loss: 0.762436  Train Score: 0.869029  Accuracy: 0.294643  Valid Loss: 1.180526  Valid Score: 0.807552  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  33  Train Loss: 0.756835  Train Score: 0.872377  Accuracy: 0.267857  Valid Loss: 1.018795  Valid Score: 0.846615  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:  34  Train Loss: 0.741184  Train Score: 0.873996  Accuracy: 0.401786  Valid Loss: 1.128982  Valid Score: 0.850521  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch:  35  Train Loss: 0.744987  Train Score: 0.870843  Accuracy: 0.294643  Valid Loss: 1.060451  Valid Score: 0.852865  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  36  Train Loss: 0.743375  Train Score: 0.874470  Accuracy: 0.330357  Valid Loss: 1.127674  Valid Score: 0.834635  Valid Accuracy: 0.083333   

In [19]:
# optimizer.param_groups[0]['lr'] = 0.001
train(epochs=30, start_epoch=61, scheduler=scheduler)

112 / 112, Epoch:  61  Train Loss: 0.481615  Train Score: 0.918276  Accuracy: 0.696429  Valid Loss: 1.099150  Valid Score: 0.866406  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  62  Train Loss: 0.486847  Train Score: 0.918415  Accuracy: 0.616071  Valid Loss: 1.076468  Valid Score: 0.855469  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  63  Train Loss: 0.477581  Train Score: 0.922907  Accuracy: 0.642857  Valid Loss: 1.054609  Valid Score: 0.859896  Valid Accuracy: 0.250000   40 Sec.
112 / 112, Epoch:  64  Train Loss: 0.463774  Train Score: 0.923382  Accuracy: 0.758929  Valid Loss: 1.022282  Valid Score: 0.868490  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  65  Train Loss: 0.417536  Train Score: 0.933454  Accuracy: 0.848214  Valid Loss: 1.061972  Valid Score: 0.872135  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  66  Train Loss: 0.430655  Train Score: 0.930469  Accuracy: 0.794643  Valid Loss: 1.022811  Valid Score: 0.852604  Valid Accuracy: 0.000000   

In [20]:
optimizer.param_groups[0]['lr'] = 0.001
train(epochs=30, start_epoch=91)

112 / 112, Epoch:  91  Train Loss: 0.366835  Train Score: 0.943276  Accuracy: 0.785714  Valid Loss: 1.099035  Valid Score: 0.867969  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  92  Train Loss: 0.364618  Train Score: 0.942578  Accuracy: 0.857143  Valid Loss: 1.086543  Valid Score: 0.871615  Valid Accuracy: 0.250000   41 Sec.
112 / 112, Epoch:  93  Train Loss: 0.365814  Train Score: 0.943052  Accuracy: 0.812500  Valid Loss: 1.069447  Valid Score: 0.872396  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:  94  Train Loss: 0.360158  Train Score: 0.944113  Accuracy: 0.901786  Valid Loss: 1.101302  Valid Score: 0.866667  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:  95  Train Loss: 0.353589  Train Score: 0.946763  Accuracy: 0.839286  Valid Loss: 0.982440  Valid Score: 0.874479  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:  96  Train Loss: 0.363607  Train Score: 0.943834  Accuracy: 0.866071  Valid Loss: 1.027333  Valid Score: 0.869531  Valid Accuracy: 0.166667   

In [21]:
train(epochs=30, start_epoch=121)

112 / 112, Epoch: 121  Train Loss: 0.331610  Train Score: 0.950614  Accuracy: 0.892857  Valid Loss: 1.110175  Valid Score: 0.865104  Valid Accuracy: 0.000000   40 Sec.
112 / 112, Epoch: 122  Train Loss: 0.338600  Train Score: 0.948605  Accuracy: 0.857143  Valid Loss: 1.048556  Valid Score: 0.872396  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch: 123  Train Loss: 0.336986  Train Score: 0.948828  Accuracy: 0.839286  Valid Loss: 0.992246  Valid Score: 0.870573  Valid Accuracy: 0.333333   40 Sec.
112 / 112, Epoch: 124  Train Loss: 0.340135  Train Score: 0.947015  Accuracy: 0.875000  Valid Loss: 1.026642  Valid Score: 0.872396  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch: 125  Train Loss: 0.335676  Train Score: 0.948270  Accuracy: 0.866071  Valid Loss: 1.083026  Valid Score: 0.876302  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch: 126  Train Loss: 0.330025  Train Score: 0.950251  Accuracy: 0.883929  Valid Loss: 1.063956  Valid Score: 0.872396  Valid Accuracy: 0.166667   

In [16]:
def train_cosine(epochs, start_epoch=1, scheduler=None, early_stopping=False, best_checkpoint=True, verbose=1):
    model.train()
    iter_valid = 300
    n_batch = len(train_loader)

    best_validation_score = 0.0
    best_train_score = 0.0
    best_epoch = 0

    log_fn = f'log/{VERSION}_{start_epoch}_{start_epoch+epochs-1}.log'
    if os.path.exists(log_fn):
        for i in range(1, 10):
            log_fn = f'log/{VERSION}_{start_epoch}_{start_epoch+epochs-1}_{i}.log'
            if not os.path.exists(log_fn):
                break
    log_file = open(log_fn, 'w')
    
    for e in range(epochs):
        epoch = start_epoch + e
        t_ = time.time()
        running_loss = 0.0
        running_score = 0.0
        valid_loss = 0.0
        valid_score = 0.0
        n_valid = 0
        
        for i, (img_batch, mask_batch) in enumerate(train_loader, 1):
            img_batch = img_batch.float().cuda()
            mask_batch = mask_batch.float().cuda()
            outputs = net(img_batch) # nn.parallel.data_parallel(model, img_batch)
            loss = criterion(outputs, mask_batch)
            # loss2 = lovasz_hinge(outputs, mask_batch)
            # loss = loss1 + loss2
            score = iou(mask_batch, torch.sigmoid(outputs))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            running_score += score.item()
            
            if i % iter_valid == 0:
                n_valid += 1
                v_l, v_s = validation()
                valid_loss += v_l
                valid_score += v_s
                print('%3d / %d, Epoch: %3d  Train Loss: %.6f  Train Score: %.6f  Valid Loss: %.6f  Valid Score: %.6f\r'
                      % (i, n_batch, epoch, running_loss/i, running_score/i, valid_loss/n_valid, valid_score/n_valid), end= "")
                continue
            
            if verbose > 0:
                print('%3d / %d, Epoch: %3d  Train Loss: %.6f  Train Score: %.6f\r'
                      % (i, n_batch, epoch, running_loss/i, running_score/i), end= "")
            scheduler.step()
        # end for in enumerate(train_loader, 1)

        n_valid += 1
        v_l, v_s = validation()
        valid_loss += v_l
        valid_score += v_s
#         if scheduler is not None:
#             scheduler.step(valid_score/n_valid)
        
        # checkpoint utilities
        if valid_score/n_valid > best_validation_score:
            best_validation_score = valid_score / n_valid
            best_train_score = running_score / i
            best_epoch = epoch
            if best_checkpoint:
                model_state_dict = model.state_dict()
                optimizer_state_dict = optimizer.state_dict()
        if early_stopping:
            if epoch - best_epoch == early_stopping:
                break

        log_message = '%3d / %d, Epoch: %3d  Train Loss: %.6f  Train Score: %.6f  Valid Loss: %.6f  Valid Score: %.6f  %3d Sec.'\
        % (i, n_batch, epoch, running_loss/i, running_score/i, valid_loss/n_valid, valid_score/n_valid, time.time() - t_)
        print(log_message)
        print(log_message, file=log_file)
        epoch += 1

    # end for in range(epochs)
    
    log_file.close()
    # save model
    if best_checkpoint:
        f_ = f'{VERSION}_{best_epoch}_{best_validation_score:.5f}.pt'
        torch.save({
            'epoch': best_epoch,
            'model_state_dict': model_state_dict,
            'optimizer_state_dict': optimizer_state_dict,
            'best_validation_score': best_validation_score,
            'best_train_score': best_train_score,
        }, f_)

    f_ = f'{VERSION}_{epoch}_{valid_score/n_valid:.5f}.pt'
    torch.save({
        'epoch': epochs,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_score': running_score/n_batch,
        'validation_score': valid_score,
    }, f_)
    return

In [17]:
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [17]:
f_ = 'tgs_1017_fold6_132_0.86042.pt'
checkpoint = load_checkpoint(f_)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_epoch = checkpoint['epoch']

In [18]:
epochs=50
optimizer.param_groups[0]['lr'] = 0.01
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*epochs, 0.001)
VERSION = 'tgs_1017_fold6_cycle1'
train(epochs=epochs, scheduler=scheduler)

112 / 112, Epoch:   1  Train Loss: 0.407745  Train Score: 0.934096  Accuracy: 0.776786  Valid Loss: 1.293797  Valid Score: 0.830469  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   2  Train Loss: 0.463596  Train Score: 0.928627  Accuracy: 0.750000  Valid Loss: 1.337402  Valid Score: 0.826823  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   3  Train Loss: 0.495825  Train Score: 0.921624  Accuracy: 0.642857  Valid Loss: 1.244820  Valid Score: 0.831510  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   4  Train Loss: 0.452705  Train Score: 0.929157  Accuracy: 0.758929  Valid Loss: 1.200379  Valid Score: 0.839583  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   5  Train Loss: 0.459709  Train Score: 0.926981  Accuracy: 0.669643  Valid Loss: 1.104657  Valid Score: 0.826563  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   6  Train Loss: 0.447016  Train Score: 0.930804  Accuracy: 0.741071  Valid Loss: 1.104468  Valid Score: 0.846094  Valid Accuracy: 0.000000   

112 / 112, Epoch:  50  Train Loss: 0.396079  Train Score: 0.939983  Accuracy: 0.767857  Valid Loss: 1.225881  Valid Score: 0.848438  Valid Accuracy: 0.083333   41 Sec.


In [19]:
epochs=50
optimizer.param_groups[0]['lr'] = 0.01
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*epochs, 0.001)
VERSION = 'tgs_1017_fold6_cycle2'
train(epochs=epochs, scheduler=scheduler)

112 / 112, Epoch:   1  Train Loss: 0.380633  Train Score: 0.943080  Accuracy: 0.776786  Valid Loss: 1.390813  Valid Score: 0.835938  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   2  Train Loss: 0.400713  Train Score: 0.939174  Accuracy: 0.732143  Valid Loss: 1.200772  Valid Score: 0.850000  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   3  Train Loss: 0.377380  Train Score: 0.942550  Accuracy: 0.723214  Valid Loss: 1.458679  Valid Score: 0.840104  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   4  Train Loss: 0.418210  Train Score: 0.934152  Accuracy: 0.616071  Valid Loss: 1.508220  Valid Score: 0.826823  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   5  Train Loss: 0.448249  Train Score: 0.933733  Accuracy: 0.598214  Valid Loss: 1.274422  Valid Score: 0.829427  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   6  Train Loss: 0.431870  Train Score: 0.931473  Accuracy: 0.714286  Valid Loss: 1.297264  Valid Score: 0.827604  Valid Accuracy: 0.000000   

112 / 112, Epoch:  50  Train Loss: 0.336947  Train Score: 0.949721  Accuracy: 0.910714  Valid Loss: 1.362301  Valid Score: 0.836458  Valid Accuracy: 0.000000   41 Sec.


In [20]:
epochs=50
optimizer.param_groups[0]['lr'] = 0.01
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*epochs, 0.001)
VERSION = 'tgs_1017_fold6_cycle3'
train(epochs=epochs, scheduler=scheduler)

112 / 112, Epoch:   1  Train Loss: 0.342816  Train Score: 0.951256  Accuracy: 0.803571  Valid Loss: 1.258008  Valid Score: 0.839323  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   2  Train Loss: 0.341301  Train Score: 0.949554  Accuracy: 0.839286  Valid Loss: 1.396815  Valid Score: 0.843490  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   3  Train Loss: 0.356693  Train Score: 0.947154  Accuracy: 0.758929  Valid Loss: 1.486524  Valid Score: 0.835417  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   4  Train Loss: 0.351963  Train Score: 0.947573  Accuracy: 0.767857  Valid Loss: 1.472667  Valid Score: 0.838802  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   5  Train Loss: 0.382657  Train Score: 0.942997  Accuracy: 0.714286  Valid Loss: 1.469174  Valid Score: 0.835677  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   6  Train Loss: 0.388671  Train Score: 0.941295  Accuracy: 0.812500  Valid Loss: 1.245894  Valid Score: 0.846354  Valid Accuracy: 0.083333   

112 / 112, Epoch:  50  Train Loss: 0.302159  Train Score: 0.958454  Accuracy: 0.812500  Valid Loss: 1.424067  Valid Score: 0.837240  Valid Accuracy: 0.166667   41 Sec.


In [20]:
epochs=50
optimizer.param_groups[0]['lr'] = 0.01
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*epochs, 0.001)
VERSION = 'tgs_1017_from_1005_fold1_cycle4'
train(epochs=epochs, scheduler=scheduler)

112 / 112, Epoch:   1  Train Loss: 0.353758  Train Score: 0.949693  Accuracy: 0.785714  Valid Loss: 1.268102  Valid Score: 0.854427  Valid Accuracy: 0.083333   40 Sec.
112 / 112, Epoch:   2  Train Loss: 0.332601  Train Score: 0.952344  Accuracy: 0.794643  Valid Loss: 1.058306  Valid Score: 0.860938  Valid Accuracy: 0.166667   40 Sec.
112 / 112, Epoch:   3  Train Loss: 0.358951  Train Score: 0.947740  Accuracy: 0.750000  Valid Loss: 1.105073  Valid Score: 0.842708  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   4  Train Loss: 0.380222  Train Score: 0.943276  Accuracy: 0.714286  Valid Loss: 1.202528  Valid Score: 0.857552  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   5  Train Loss: 0.330617  Train Score: 0.950446  Accuracy: 0.830357  Valid Loss: 1.200372  Valid Score: 0.841667  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   6  Train Loss: 0.342176  Train Score: 0.951339  Accuracy: 0.776786  Valid Loss: 1.166298  Valid Score: 0.861719  Valid Accuracy: 0.250000   

112 / 112, Epoch:  50  Train Loss: 0.287869  Train Score: 0.958705  Accuracy: 0.857143  Valid Loss: 1.302330  Valid Score: 0.857292  Valid Accuracy: 0.000000   41 Sec.


In [21]:
epochs=50
optimizer.param_groups[0]['lr'] = 0.01
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*epochs, 0.001)
VERSION = 'tgs_1017_from_1005_fold1_cycle5'
train(epochs=epochs, scheduler=scheduler)

112 / 112, Epoch:   1  Train Loss: 0.283121  Train Score: 0.959961  Accuracy: 0.803571  Valid Loss: 1.149636  Valid Score: 0.851823  Valid Accuracy: 0.000000   41 Sec.
112 / 112, Epoch:   2  Train Loss: 0.308620  Train Score: 0.955301  Accuracy: 0.812500  Valid Loss: 1.114796  Valid Score: 0.862760  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:   3  Train Loss: 0.296485  Train Score: 0.958008  Accuracy: 0.830357  Valid Loss: 1.144916  Valid Score: 0.855729  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:   4  Train Loss: 0.298377  Train Score: 0.956027  Accuracy: 0.830357  Valid Loss: 1.424573  Valid Score: 0.842708  Valid Accuracy: 0.083333   41 Sec.
112 / 112, Epoch:   5  Train Loss: 0.279578  Train Score: 0.959682  Accuracy: 0.848214  Valid Loss: 1.139388  Valid Score: 0.858333  Valid Accuracy: 0.166667   41 Sec.
112 / 112, Epoch:   6  Train Loss: 0.273885  Train Score: 0.961105  Accuracy: 0.857143  Valid Loss: 1.386790  Valid Score: 0.856250  Valid Accuracy: 0.083333   

112 / 112, Epoch:  50  Train Loss: 0.285959  Train Score: 0.958761  Accuracy: 0.848214  Valid Loss: 1.205393  Valid Score: 0.852344  Valid Accuracy: 0.333333   40 Sec.


In [8]:
def predict(TTA=True):
    prediction = []
    n_batch = len(test_loader)
    model.eval()
    with torch.no_grad():
        for i, img_batch in enumerate(test_loader):
            preds_batch = model(img_batch.cuda())[0]
            preds_batch_hf = model(img_batch.cuda().flip(3))[0]
            preds_batch = torch.sigmoid((preds_batch + preds_batch_hf.flip(3)) / 2)
            prediction.append(preds_batch.cpu().detach().numpy())
            print('%3d / %3d, prediction \r' % (i+1, n_batch), end='')
    model.train()
    return np.vstack(prediction)

def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b > prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def unpadding(preds):
    return preds[:, :, 14: 128-13, 14: 128-13]

In [14]:
# files = [
#     'tgs_1017_fold3_119_0.89115.pt',
#     'tgs_1017_from_1005_fold3_cycle1_33_0.88229.pt',
#     'tgs_1017_from_1005_fold3_cycle2_2_0.88828.pt',
#     'tgs_1017_from_1005_fold3_cycle3_11_0.87839.pt'
# ]
files = [
    'tgs_1017_fold4_99_0.85833.pt',
    'tgs_1017_fold4_cycle1_47_0.84740.pt',
    'tgs_1017_fold4_cycle2_1_0.85417.pt',
    'tgs_1017_fold4_cycle3_19_0.85208.pt'
]
# files = [
#     'tgs_1017_fold5_126_0.87734.pt',
#     'tgs_1017_fold5_cycle1_9_0.87500.pt',
#     'tgs_1017_fold5_cycle2_16_0.87448.pt',
#     'tgs_1017_fold5_cycle3_2_0.87604.pt'
# ]
# files = [
#     'tgs_1017_fold6_132_0.86042.pt',
#     'tgs_1017_fold6_cycle1_37_0.84922.pt',
#     'tgs_1017_fold6_cycle2_28_0.85156.pt',
#     'tgs_1017_fold6_cycle3_20_0.85755.pt'
# ]

In [15]:
# instantiate model
batch_size=30
test_dataset = TGSdataset_test(images_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2, pin_memory=True, collate_fn=tgs_collate_test)
preds = np.zeros((18000, 1, 128, 128), dtype=np.float32)
for f_ in files:
    checkpoint = load_checkpoint(f_)
    model.load_state_dict(checkpoint['model_state_dict'])
    preds += predict()
prediction = unpadding(preds / len(files))
np.save('tgs_1017_fold4_snapshot_ensemble.npy', prediction)
# prediction = (prediction > 0.5).astype('uint8')
# prediction = list(map(rle_encoding, prediction))
# prediction = [(' ').join(str(e)[1:-1].split(', ')) for e in prediction]
# submission = pd.DataFrame()
# submission['id'] = df_test.index
# submission['rle_mask'] = prediction
# submission.to_csv('tgs_1017_fold6_snapshot_ensemble.csv', index=False)

600 / 600, prediction 

In [11]:
batch_size=30
test_dataset = TGSdataset_test(images_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2, pin_memory=True, collate_fn=tgs_collate_test)
# prediction = predict()
# prediction = unpadding(prediction)

In [9]:
files = [
    'tgs_1017_from_1005_fold1_cycle1_36_0.87031.pt',
    'tgs_1017_from_1005_fold1_cycle2_22_0.87708.pt',
    'tgs_1017_from_1005_fold1_cycle3_12_0.87266.pt',
    'tgs_1017_from_1005_fold1_cycle4_19_0.87240.pt',
    'tgs_1017_from_1005_fold1_cycle5_37_0.87500.pt',
]

In [13]:
preds = np.zeros((18000, 1, 128, 128), dtype=np.float32)
for f_ in files:
    checkpoint = load_checkpoint(f_)
    model.load_state_dict(checkpoint['model_state_dict'])
    preds += predict()

600 / 600, prediction 

In [16]:
checkpoint = load_checkpoint('tgs_1017_147_0.88047.pt')
model.load_state_dict(checkpoint['model_state_dict'])
preds += predict()

600 / 600, prediction 

In [17]:
prediction = unpadding(preds / 6)

In [18]:
prediction = (prediction > 0.5).astype('uint8')
prediction = list(map(rle_encoding, prediction))
prediction = [(' ').join(str(e)[1:-1].split(', ')) for e in prediction]
submission = pd.DataFrame()
submission['id'] = df_test.index
submission['rle_mask'] = prediction
submission.to_csv('tgs_1017_from_1015_folds1_snapshot_ensemble.csv', index=False)

In [19]:
prediction = unpadding(preds / 6)
prediction = (prediction > 0.5).astype('uint8')
np.save('tgs_1017_from_1015_attuned_snapshot_ensemble.npy', prediction)

In [15]:
batch_size=30
test_dataset = TGSdataset_test(images_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2, pin_memory=True, collate_fn=tgs_collate_test)
prediction = predict()
prediction = unpadding(prediction)
prediction = (prediction > 0.5).astype('uint8')
prediction = list(map(rle_encoding, prediction))
prediction = [(' ').join(str(e)[1:-1].split(', ')) for e in prediction]
submission = pd.DataFrame()
submission['id'] = df_test.index
submission['rle_mask'] = prediction
submission.to_csv('tgs_1017_from_1015_attuned_single_snapshot.csv', index=False)

600 / 600, prediction 

In [None]:
if SUBMIT:
    test_dataset = TGSdataset_test(images_test)
    test_loader = DataLoader(test_dataset, batch_size=30, num_workers=2, pin_memory=True, collate_fn=tgs_collate_test)
    prediction = predict()
    prediction = (prediction > best_threshold).astype('uint8')
    prediction = list(map(rle_encoding, prediction))
    prediction = [(' ').join(str(e)[1:-1].split(', ')) for e in prediction]
    submission = pd.DataFrame()
    submission['id'] = df_test.index
    submission['rle_mask'] = prediction
    submission.to_csv('submission.csv', index=False)