In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# CV = 0: Best IoU: 0.83004
# CV = 2: Best IoU: 0.8243
# CV = 3: Best IoU: 0.821
# CV = 4: Best IoU: 0.8388

In [3]:
import gc
import pandas as pd
import numpy as np
import torch
import datetime
from tqdm import tqdm_notebook
from torch import nn
from torchvision import transforms
from cnn_finetune import make_model
from sklearn.model_selection import StratifiedKFold

import model4 as M
import unet_parts
from dataset import SegmentationDataset, SegmentationInferenceDataset
from data import *
from util import *
from loss import FocalLoss, dice_loss
from lovasz_loss import lovasz_hinge_flat, lovasz_hinge

In [4]:
torch.cuda.is_available()
device = torch.device('cuda')

In [5]:
# Settings
# ========

# Target k in KFold
CV = 0

# Skip pretraining
skip_pretraining = True

In [6]:
train_df, val_df = get_dfs_fold(k=CV)

train_dataset = SegmentationDataset(train_df, size=(128, 128),
                                    use_depth_channels=True,
                                    with_aux_label=False,
                                    as_aux_label='coverage_class',
                                    use_augmentation=True,
                                    mean_sub=False)
val_dataset = SegmentationInferenceDataset(val_df, input_size=(128, 128),
                                           use_depth_channels=True, with_aux_label=False, with_gt=True,
                                           mean_sub=False)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=8, shuffle=False)
print('Loaded dataset and created loader')

Use augmentations
Loaded dataset and created loader


# Training

In [7]:
gc.collect()
torch.cuda.empty_cache()

model = M.UNetRes34BilinearHcSCSEv5(n_classes=1)

if skip_pretraining:
    pretrained_model = model_dir / 'unet_res34_bilinear_hcscse_v5_kfold_cv{}_phase2_dict.model'.format(CV)
    W = torch.load(str(pretrained_model))
    model.load_state_dict(W)
    print('loaded: {}'.format(pretrained_model))
    
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
#optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

Loaded pretrained resnet weights
loaded: D:\Users\ns\git_repos\kaggle-tgs-salt\models\unet_res34_bilinear_hcscse_v5_kfold_cv0_phase2_dict.model


In [8]:
def criterion_phase1(logit, target_pixel):
    segmentation_loss = F.binary_cross_entropy_with_logits(logit.view(-1), target_pixel.view(-1), size_average=True)
    #segmentation_dice_loss = dice_loss(logit, target_pixel)
    
    return segmentation_loss

In [9]:
def train_phase1(model, n_epoch, train_iter, val_iter):
    best_iou = 0.0
    n_stay = 0
    early_stopping_limit = 100
    
    for epoch in range(n_epoch):
        model.train()
        
        gc.collect()
        torch.cuda.empty_cache()
        
        total_loss = 0
        total_size = 0
        
        for batch_idx, (data, target) in enumerate(train_iter):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # Forward
            logit = model(data)
            loss = criterion_phase1(logit, target)
            
            total_loss += loss.item()
            total_size += data.size(0)
            
            # Backward
            loss.backward()
            optimizer.step()
            
            if batch_idx % 50 == 0:
                now = datetime.datetime.now()
                print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.6f}'.format(
                    now,
                    epoch, batch_idx * len(data), len(train_iter.dataset),
                    100. * batch_idx / len(train_iter), total_loss / total_size))
                
        gc.collect()
        torch.cuda.empty_cache()
        
        with torch.no_grad():
            iou = evaluate(model, val_iter, device=device, use_sigmoid=True, threshold=0.5)
        print('[{}] Train Epoch: {}\tIoU: {:.6f}'.format(now, epoch, iou))
        
        if best_iou < iou:
            best_iou = iou
            save_model(model, f'unet_res34_bilinear_hcscse_v5_kfold_cv{CV}_phase1')
            print('Saved model at {} (IoU: {})'.format(epoch, iou))
            n_stay = 0
        else:
            n_stay += 1
        
        if n_stay >= early_stopping_limit:
            print('Early stopping at {} (Best IoU: {})'.format(epoch, best_iou))
            break
            
    return model

In [10]:
if not skip_pretraining:
    model = train_phase1(model, 50, train_loader, val_loader)



[2018-10-20 04:43:17.217495] Train Epoch: 0	IoU: 0.600748
Saved model at 0 (IoU: 0.6007481296758105)
[2018-10-20 04:45:09.919698] Train Epoch: 1	IoU: 0.632668
Saved model at 1 (IoU: 0.6326683291770573)
[2018-10-20 04:46:52.073847] Train Epoch: 2	IoU: 0.643766
Saved model at 2 (IoU: 0.6437655860349126)
[2018-10-20 04:48:34.355460] Train Epoch: 3	IoU: 0.682045
Saved model at 3 (IoU: 0.6820448877805486)
[2018-10-20 04:50:16.413995] Train Epoch: 4	IoU: 0.676434
[2018-10-20 04:51:58.058552] Train Epoch: 5	IoU: 0.716334
Saved model at 5 (IoU: 0.7163341645885287)
[2018-10-20 04:53:40.015522] Train Epoch: 6	IoU: 0.715337
[2018-10-20 04:55:21.547684] Train Epoch: 7	IoU: 0.747880
Saved model at 7 (IoU: 0.7478802992518703)
[2018-10-20 04:57:02.844504] Train Epoch: 8	IoU: 0.734913
[2018-10-20 04:58:43.478524] Train Epoch: 9	IoU: 0.719327
[2018-10-20 05:00:24.014148] Train Epoch: 10	IoU: 0.736035
[2018-10-20 05:02:04.426210] Train Epoch: 11	IoU: 0.736658
[2018-10-20 05:03:45.002671] Train Epoch: 12

[2018-10-20 05:15:29.340939] Train Epoch: 19	IoU: 0.760973
[2018-10-20 05:17:09.900300] Train Epoch: 20	IoU: 0.750000
[2018-10-20 05:18:50.556129] Train Epoch: 21	IoU: 0.747880
[2018-10-20 05:20:31.094269] Train Epoch: 22	IoU: 0.744389
[2018-10-20 05:22:11.593463] Train Epoch: 23	IoU: 0.767207
Saved model at 23 (IoU: 0.7672069825436408)
[2018-10-20 05:23:52.587185] Train Epoch: 24	IoU: 0.762219
[2018-10-20 05:25:33.135949] Train Epoch: 25	IoU: 0.764090
[2018-10-20 05:27:13.683882] Train Epoch: 26	IoU: 0.764589
[2018-10-20 05:28:54.126266] Train Epoch: 27	IoU: 0.776185
Saved model at 27 (IoU: 0.7761845386533666)
[2018-10-20 05:30:35.040889] Train Epoch: 28	IoU: 0.772943
[2018-10-20 05:32:15.641815] Train Epoch: 29	IoU: 0.779052
Saved model at 29 (IoU: 0.7790523690773067)
[2018-10-20 05:33:56.519922] Train Epoch: 30	IoU: 0.774190
[2018-10-20 05:35:37.100452] Train Epoch: 31	IoU: 0.776808
[2018-10-20 05:37:17.738675] Train Epoch: 32	IoU: 0.751122
[2018-10-20 05:38:58.354472] Train Epoch: 

[2018-10-20 05:49:02.106537] Train Epoch: 39	IoU: 0.788279
[2018-10-20 05:50:43.451206] Train Epoch: 40	IoU: 0.759975
[2018-10-20 05:52:24.275493] Train Epoch: 41	IoU: 0.778554
[2018-10-20 05:54:04.882975] Train Epoch: 42	IoU: 0.784040
[2018-10-20 05:55:45.430652] Train Epoch: 43	IoU: 0.783915
[2018-10-20 05:57:25.937376] Train Epoch: 44	IoU: 0.764589
[2018-10-20 05:59:06.571860] Train Epoch: 45	IoU: 0.788778
Saved model at 45 (IoU: 0.7887780548628429)
[2018-10-20 06:00:47.411249] Train Epoch: 46	IoU: 0.778928
[2018-10-20 06:02:27.795344] Train Epoch: 47	IoU: 0.782544
[2018-10-20 06:04:07.145816] Train Epoch: 48	IoU: 0.773192
[2018-10-20 06:05:46.615399] Train Epoch: 49	IoU: 0.766833


In [8]:
def criterion_phase2(logit, target_pixel):
    segmentation_loss = lovasz_hinge(logit.squeeze(), target_pixel.squeeze())
    
    return segmentation_loss

In [12]:
def train_phase2(model, n_epoch, train_iter, val_iter):
    best_iou = 0.0
    n_stay = 0
    early_stopping_limit = 100
    reduce_limit = 10
    min_lr = 0.000001
    base_lr = 0.005
    current_lr = base_lr
    
    for epoch in range(n_epoch):
        model.train()
        
        gc.collect()
        torch.cuda.empty_cache()
        
        total_loss = 0
        total_size = 0
        
        for batch_idx, (data, target) in enumerate(train_iter):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # Forward
            logit = model(data)
            loss = criterion_phase2(logit, target)
            
            total_loss += loss.item()
            total_size += data.size(0)
            
            # Backward
            loss.backward()
            optimizer.step()
            
            if batch_idx % 50 == 0:
                now = datetime.datetime.now()
                print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.6f}'.format(
                    now,
                    epoch, batch_idx * len(data), len(train_iter.dataset),
                    100. * batch_idx / len(train_iter), total_loss / total_size))
                
        gc.collect()
        torch.cuda.empty_cache()
        
        with torch.no_grad():
            iou = evaluate(model, val_iter, device=device, use_sigmoid=False, threshold=0.0)
        print('[{}] Train Epoch: {}\tIoU: {:.6f}'.format(now, epoch, iou))
        
        if best_iou < iou:
            best_iou = iou
            save_model(model, f'unet_res34_bilinear_hcscse_v5_kfold_cv{CV}_phase2')
            print('Saved model at {} (IoU: {})'.format(epoch, iou))
            n_stay = 0
        else:
            n_stay += 1
            
        if n_stay >= early_stopping_limit:
            print('Early stopping at {} (Best IoU: {})'.format(epoch, best_iou))
            break
            
        if n_stay >= reduce_limit:
            current_lr = 0.5 * current_lr
            for g in optimizer.param_groups:
                g['lr'] = current_lr
            n_stay = 0
            print('Reduce lr at {} (to: {})'.format(epoch, current_lr))
        
    return model

In [13]:
model = train_phase2(model, 80, train_loader, val_loader)



[2018-10-20 06:07:33.407687] Train Epoch: 0	IoU: 0.732918
Saved model at 0 (IoU: 0.732917705735661)
[2018-10-20 06:09:22.918983] Train Epoch: 1	IoU: 0.736409
Saved model at 1 (IoU: 0.7364089775561098)
[2018-10-20 06:11:12.001606] Train Epoch: 2	IoU: 0.738030
Saved model at 2 (IoU: 0.7380299251870326)
[2018-10-20 06:13:00.862326] Train Epoch: 3	IoU: 0.774813
Saved model at 3 (IoU: 0.7748129675810475)
[2018-10-20 06:14:49.665612] Train Epoch: 4	IoU: 0.787781
Saved model at 4 (IoU: 0.7877805486284288)
[2018-10-20 06:16:38.678767] Train Epoch: 5	IoU: 0.784788
[2018-10-20 06:18:27.001297] Train Epoch: 6	IoU: 0.787656
[2018-10-20 06:20:15.347973] Train Epoch: 7	IoU: 0.784165
[2018-10-20 06:22:03.648918] Train Epoch: 8	IoU: 0.777307
[2018-10-20 06:23:52.001895] Train Epoch: 9	IoU: 0.777681
[2018-10-20 06:25:40.362975] Train Epoch: 10	IoU: 0.790898
Saved model at 10 (IoU: 0.7908977556109725)
[2018-10-20 06:27:29.259247] Train Epoch: 11	IoU: 0.788279
[2018-10-20 06:29:17.518728] Train Epoch: 12

[2018-10-20 06:41:56.512987] Train Epoch: 19	IoU: 0.803990
Saved model at 19 (IoU: 0.8039900249376559)
[2018-10-20 06:43:45.108958] Train Epoch: 20	IoU: 0.798005
[2018-10-20 06:45:33.448689] Train Epoch: 21	IoU: 0.792145
[2018-10-20 06:47:21.740719] Train Epoch: 22	IoU: 0.798753
[2018-10-20 06:49:10.089958] Train Epoch: 23	IoU: 0.797257
[2018-10-20 06:50:59.773679] Train Epoch: 24	IoU: 0.797631
[2018-10-20 06:52:48.635774] Train Epoch: 25	IoU: 0.798753
[2018-10-20 06:54:37.262645] Train Epoch: 26	IoU: 0.773192
[2018-10-20 06:56:25.530206] Train Epoch: 27	IoU: 0.803367
[2018-10-20 06:58:13.870702] Train Epoch: 28	IoU: 0.801247
[2018-10-20 07:00:02.141582] Train Epoch: 29	IoU: 0.810474
Saved model at 29 (IoU: 0.8104738154613467)
[2018-10-20 07:01:50.601337] Train Epoch: 30	IoU: 0.795761
[2018-10-20 07:03:38.940789] Train Epoch: 31	IoU: 0.812095
Saved model at 31 (IoU: 0.8120947630922692)
[2018-10-20 07:05:27.440931] Train Epoch: 32	IoU: 0.816584
Saved model at 32 (IoU: 0.8165835411471323

[2018-10-20 07:18:04.942216] Train Epoch: 39	IoU: 0.810224
[2018-10-20 07:19:53.099757] Train Epoch: 40	IoU: 0.813716
[2018-10-20 07:21:41.347350] Train Epoch: 41	IoU: 0.817332
Saved model at 41 (IoU: 0.8173316708229427)
[2018-10-20 07:23:30.524654] Train Epoch: 42	IoU: 0.794514
[2018-10-20 07:25:19.670628] Train Epoch: 43	IoU: 0.818080
Saved model at 43 (IoU: 0.8180798004987532)
[2018-10-20 07:27:08.617801] Train Epoch: 44	IoU: 0.792394
[2018-10-20 07:28:57.121749] Train Epoch: 45	IoU: 0.818703
Saved model at 45 (IoU: 0.8187032418952618)
[2018-10-20 07:30:46.232518] Train Epoch: 46	IoU: 0.806484
[2018-10-20 07:32:35.056316] Train Epoch: 47	IoU: 0.810848


KeyboardInterrupt: 

In [9]:
def train_phase3(model, n_epoch, train_iter, val_iter):
    best_iou = 0.818703
    n_stay = 0
    early_stopping_limit = 100
    reduce_limit = 10
    min_lr = 0.000001
    base_lr = 0.005
    current_lr = base_lr
    
    for g in optimizer.param_groups:
        g['lr'] = current_lr
    n_stay = 0
    print('Reduce lr at 0 (to: {})'.format(current_lr))
    
    for epoch in range(n_epoch):
        model.train()
        
        gc.collect()
        torch.cuda.empty_cache()
        
        total_loss = 0
        total_size = 0
        
        for batch_idx, (data, target) in enumerate(train_iter):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # Forward
            logit = model(data)
            loss = criterion_phase2(logit, target)
            
            total_loss += loss.item()
            total_size += data.size(0)
            
            # Backward
            loss.backward()
            optimizer.step()
            
            if batch_idx % 50 == 0:
                now = datetime.datetime.now()
                print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.6f}'.format(
                    now,
                    epoch, batch_idx * len(data), len(train_iter.dataset),
                    100. * batch_idx / len(train_iter), total_loss / total_size))
                
        gc.collect()
        torch.cuda.empty_cache()
        
        with torch.no_grad():
            iou = evaluate(model, val_iter, device=device, use_sigmoid=False, threshold=0.0)
        print('[{}] Train Epoch: {}\tIoU: {:.6f}'.format(now, epoch, iou))
        
        if best_iou < iou:
            best_iou = iou
            save_model(model, f'unet_res34_bilinear_hcscse_v5_kfold_cv{CV}_phase2')
            print('Saved model at {} (IoU: {})'.format(epoch, iou))
            n_stay = 0
        else:
            n_stay += 1
            
        if n_stay >= early_stopping_limit:
            print('Early stopping at {} (Best IoU: {})'.format(epoch, best_iou))
            break
            
        if n_stay >= reduce_limit:
            current_lr = 0.5 * current_lr
            for g in optimizer.param_groups:
                g['lr'] = current_lr
            n_stay = 0
            print('Reduce lr at {} (to: {})'.format(epoch, current_lr))
        
    return model

In [10]:
model = train_phase3(model, 80, train_loader, val_loader)

Reduce lr at 0 (to: 0.005)




[2018-10-20 07:41:22.862354] Train Epoch: 0	IoU: 0.823192
Saved model at 0 (IoU: 0.8231920199501247)
[2018-10-20 07:43:20.339146] Train Epoch: 1	IoU: 0.830050
Saved model at 1 (IoU: 0.8300498753117208)
[2018-10-20 07:45:10.914224] Train Epoch: 2	IoU: 0.818703
[2018-10-20 07:47:00.248338] Train Epoch: 3	IoU: 0.828304
[2018-10-20 07:48:49.785591] Train Epoch: 4	IoU: 0.822569
[2018-10-20 07:50:39.187589] Train Epoch: 5	IoU: 0.821322
[2018-10-20 07:52:28.814519] Train Epoch: 6	IoU: 0.823815
[2018-10-20 07:54:18.447882] Train Epoch: 7	IoU: 0.821072
[2018-10-20 07:56:08.495435] Train Epoch: 8	IoU: 0.826933
[2018-10-20 07:57:58.982561] Train Epoch: 9	IoU: 0.817830
[2018-10-20 07:59:48.993950] Train Epoch: 10	IoU: 0.820823
[2018-10-20 08:01:38.102898] Train Epoch: 11	IoU: 0.824065
Reduce lr at 11 (to: 0.0025)
[2018-10-20 08:03:26.822970] Train Epoch: 12	IoU: 0.826559
[2018-10-20 08:05:15.617364] Train Epoch: 13	IoU: 0.825436


KeyboardInterrupt: 

In [None]:
val_check_dataset = SegmentationInferenceDataset(val_df, input_size=(128, 128),
                                                 with_gt=True, with_raw_input=True, use_depth_channels=True,
                                                 mean_sub=True)
val_check_loader = torch.utils.data.DataLoader(val_check_dataset, batch_size=8, shuffle=False)
val_check_iter = iter(val_check_loader)

In [None]:
model.eval()
im, x, t = next(val_check_iter)
pred = predict(model, x, device, with_tta=True, use_sigmoid=True, threshold=0.5)
show_prediction(im, pred, t)

In [None]:
model.eval()
im, x, t = next(val_check_iter)
pred = predict(model, x, device, with_tta=True, use_sigmoid=False, threshold=0.0)
show_prediction(im, pred, t)

In [None]:
model.eval()
im, x, t = next(val_check_iter)
pred = predict(model, x, device, with_tta=True, use_sigmoid=False, threshold=0.0)
show_prediction(im, pred, t)

In [None]:
model.eval()
im, x, t = next(val_check_iter)
pred = predict(model, x, device, with_tta=True, use_sigmoid=False, threshold=0.0)
show_prediction(im, pred, t)