In [3]:
import os
import cv2
import collections
import time 
import tqdm
from PIL import Image
from functools import partial
train_on_gpu = True

import gc
from radam import RAdam
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import StratifiedKFold,train_test_split
from sklearn.metrics import roc_auc_score, classification_report

import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
from model import metric
import albumentations as albu

from train import *
from inference import *
from dataset import *

os.environ['CUDA_VISIBLE_DEVICES'] = '8,9,10,11,12,13,14,15'

## Data overview

Let's have a look at the data first.

In [4]:
path = '.'
os.listdir(path)
train = pd.read_csv(f'{path}/train.csv')[:]
sub = pd.read_csv(f'{path}/sample_submission.csv')[:]

# original image
n_train = len(os.listdir(f'{path}/train_images'))
n_test = len(os.listdir(f'{path}/test_images'))

print(f'There are {n_train} images in train dataset')
print(f'There are {n_test} images in test dataset')
train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])
sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])

There are 5546 images in train dataset
There are 3699 images in test dataset


## Preparing data for modelling

At first, let's create a list of unique image ids and the count of masks for images. This will allow us to make a stratified split based on this count.

In [5]:
id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().\
reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'})
# train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values, random_state=42, stratify=id_mask_count['count'], test_size=0.1)
test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values

In [6]:
# pipeline

train_parameters = {
#     "ENCODER": 'efficientnet-b2',
    "ENCODER": 'resnet34',
    "ENCODER_WEIGHTS": 'imagenet',
    "ACTIVATION": None,
    "num_epochs": 30,
    "num_workers": 32
}
def pipeline(net="Unet",rsize=640, KFold=1, batch_size=16*5, test_set=sub):

#     net="FPN"
#     rsize=256
#     KFold=3
#     batch_size=8*10
#     test_set=sub
    loader_fold = []
    bs = batch_size
    sub = test_set
    preprocessing_fn = smp.encoders.get_preprocessing_fn(train_parameters["ENCODER"], train_parameters["ENCODER_WEIGHTS"])

    skf = StratifiedKFold(n_splits=20, shuffle=True, random_state=1212)
    for k, (train_ids, valid_ids) in enumerate(skf.split(id_mask_count['img_id'].values, id_mask_count["count"])):

        # exit
        if k == KFold:
            break

        print(); print('#'*10,'FOLD',k,'#'*10)
        print('Train on',len(train_ids),'Validate on',len(valid_ids))

        # define train test dataset
        train_names = np.array(id_mask_count)[train_ids][::,0]
        valid_names = np.array(id_mask_count)[valid_ids][::,0]

        train_dataset = CloudDataset(df=train, 
                                     datatype='train', 
                                     img_ids=train_names, 
                                     transforms = get_training_augmentation(rsize), 
                                     preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset = CloudDataset(df=train, datatype='valid', 
                                     img_ids=valid_names, 
                                     transforms = get_validation_augmentation(rsize,'normal'), 
                                     preprocessing=get_preprocessing(preprocessing_fn))

        train_loader = DataLoader(train_dataset, 
                                  batch_size=bs, 
                                  shuffle=True, 
                                  num_workers=train_parameters["num_workers"], 
                                  pin_memory=True,
                                  drop_last=True)
        valid_loader = DataLoader(valid_dataset, 
                                  batch_size=bs, 
                                  shuffle=False, 
                                  num_workers=train_parameters["num_workers"])

        loaders = {
            "train": train_loader,
            "valid": valid_loader
        }

        loader_fold.append(loaders)

        train_parameters["loaders"] = loaders
        train_parameters["net"] = net
        train_parameters["k"] = k

        train_model(train_parameters)
    
    ## best model
    KFold_predict = []
    avg_dice = 0
    for k in range(KFold):
        config = "best"
        parameters={
             "sub":sub,
             "k":k  ,
             "net":net,
             "config":config,
             "loader_fold":loader_fold,
             "test_ids":test_ids,
             "rsize":rsize,
             "preprocessing_fn":preprocessing_fn,
             "avg_dice":avg_dice,
             "ENCODER":train_parameters["ENCODER"],
             "ENCODER_WEIGHTS":train_parameters["ENCODER_WEIGHTS"],
             "ACTIVATION":train_parameters["ACTIVATION"]
        }
        pixels_tta, avg_dice = infer_pipeline(parameters)
        print(len(pixels_tta))
        KFold_predict.append(np.array(pixels_tta))
    print(f"{KFold} Fold Average Dice: ", avg_dice/KFold)
    ## Predicting
    encoded_pixels_best = []
    pixels = sum(KFold_predict)
    for i in range(len(pixels)):
        p = np.where(pixels[i] >= KFold/2, 1, 0)
        if p.sum() == 0:
            encoded_pixels_best.append('')
        else:
            encoded_pixels_best.append(mask2rle(p))
    del pixels, KFold_predict



    # last model
    KFold_predict = []
    avg_dice = 0
    for k in range(KFold):
        config = "last"
        parameters={
             "sub":sub,
             "k":k  ,
             "net":net,
             "config":config,
             "loader_fold":loader_fold,
             "test_ids":test_ids,
             "rsize":rsize,
             "preprocessing_fn":preprocessing_fn,
             "avg_dice":avg_dice,
             "ENCODER":train_parameters["ENCODER"],
             "ENCODER_WEIGHTS":train_parameters["ENCODER_WEIGHTS"],
             "ACTIVATION":train_parameters["ACTIVATION"]
        }
        pixels_tta, avg_dice = infer_pipeline(parameters)
        KFold_predict.append(np.array(pixels_tta))
    print(f"{KFold} Fold Average Dice: ", avg_dice/KFold)
    ## Predicting
    encoded_pixels_last = []
    pixels = sum(KFold_predict)
    for i in range(len(pixels)):
        p = np.where(pixels[i] >= KFold/2, 1, 0)
        if p.sum() == 0:
            encoded_pixels_last.append('')
        else:
            encoded_pixels_last.append(mask2rle(p))

    sub['EncodedPixels_best'] = encoded_pixels_best
    sub['EncodedPixels_last'] = encoded_pixels_last
    sub.to_csv('{}effib2_{}.csv'.format(net,rsize), columns=['Image_Label', 
                                                               'EncodedPixels_best',
                                                               'EncodedPixels_last'], index=False)

    del KFold_predict,pixels_tta,encoded_pixels_best,encoded_pixels_last, pixels
    print("save success!")

## Model training

In [None]:
pipeline(net="FPN",rsize=320, KFold=3, batch_size=8*16, test_set=sub)


########## FOLD 0 ##########
Train on 5267 Validate on 279



Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().



Let's use 8 GPUs!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

1/30 * Epoch (train): 100% 41/41 [02:13<00:00,  3.26s/it, dice=0.479, loss=0.858]
1/30 * Epoch (valid): 100% 3/3 [00:27<00:00,  9.07s/it, dice=0.491, loss=0.752]
[2019-11-18 20:40:51,332] 
1/30 * Epoch 1 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=525.8852 | _timers/batch_time=2.3048 | _timers/data_time=1.1014 | _timers/model_time=1.2033 | dice=0.3738 | loss=1.0782
1/30 * Epoch 1 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=282.1065 | _timers/batch_time=8.6314 | _timers/data_time=8.4849 | _timers/model_time=0.1462 | dice=0.4846 | loss=0.8298
2/30 * Epoch (train): 100% 41/41 [01:33<00:00,  2.29s/it, dice=0.526, loss=0.770]
2/30 * Epoch (valid): 100% 3/3 [00:26<00:00,  8.75s/it, dice=0.529, loss=0.707]
[2019-11-18 20:43:13,201] 
2/30 * Epoch 2 (train): _base/lr=0.0100 | _base/momentum=0.9000 | _timers/_fps=526.9155 | _timers/batch_time=1.3132 | _timers/data_time=1.0441 | _timers/model_time=0.2690 | dice=0.5000 | loss=0.8147
2/30 * Epoch 2 (valid): _

15/30 * Epoch (train): 100% 41/41 [01:31<00:00,  2.22s/it, dice=0.583, loss=0.680]
15/30 * Epoch (valid): 100% 3/3 [00:29<00:00,  9.83s/it, dice=0.559, loss=0.648]
[2019-11-18 21:12:48,558] 
15/30 * Epoch 15 (train): _base/lr=3.375e-05 | _base/momentum=0.9000 | _timers/_fps=506.6615 | _timers/batch_time=1.2448 | _timers/data_time=0.9292 | _timers/model_time=0.3155 | dice=0.5870 | loss=0.6659
15/30 * Epoch 15 (valid): _base/lr=3.375e-05 | _base/momentum=0.9000 | _timers/_fps=296.4589 | _timers/batch_time=9.4018 | _timers/data_time=9.2613 | _timers/model_time=0.1403 | dice=0.5576 | loss=0.7226
16/30 * Epoch (train): 100% 41/41 [01:33<00:00,  2.28s/it, dice=0.607, loss=0.637]
16/30 * Epoch (valid): 100% 3/3 [00:28<00:00,  9.63s/it, dice=0.566, loss=0.633]
[2019-11-18 21:15:05,350] 
16/30 * Epoch 16 (train): _base/lr=3.375e-05 | _base/momentum=0.9000 | _timers/_fps=513.3586 | _timers/batch_time=1.3261 | _timers/data_time=1.0260 | _timers/model_time=0.2999 | dice=0.5840 | loss=0.6705
16/30 


Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().



Let's use 8 GPUs!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

1/30 * Epoch (train): 100% 41/41 [01:37<00:00,  2.38s/it, dice=0.492, loss=0.835]
1/30 * Epoch (valid): 100% 3/3 [00:25<00:00,  8.46s/it, dice=0.485, loss=0.805]
[2019-11-18 21:22:27,318] 
1/30 * Epoch 1 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=533.7822 | _timers/batch_time=1.4163 | _timers/data_time=1.1369 | _timers/model_time=0.2793 | dice=0.3703 | loss=1.0990
1/30 * Epoch 1 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=305.6825 | _timers/batch_time=8.0287 | _timers/data_time=7.8854 | _timers/model_time=0.1431 | dice=0.4961 | loss=0.8621
2/30 * Epoch (train): 100% 41/41 [01:34<00:00,  2.31s/it, dice=0.515, loss=0.807]
2/30 * Epoch (valid): 100% 3/3 [00:29<00:00,  9.75s/it, dice=0.572, loss=0.636]
[2019-11-18 21:24:57,694] 
2/30 * Epoch 2 (train): _base/lr=0.0100 | _base/momentum=0.9000 | _timers/_fps=533.8383 | _timers/batch_time=1.3517 | _timers/data_time=1.0956 | _timers/model_time=0.2559 | dice=0.5013 | loss=0.8124
2/30 * Epoch 2 (valid): _

15/30 * Epoch (train): 100% 41/41 [01:32<00:00,  2.26s/it, dice=0.598, loss=0.643]
15/30 * Epoch (valid): 100% 3/3 [00:25<00:00,  8.55s/it, dice=0.591, loss=0.603]
[2019-11-18 21:56:11,905] 
15/30 * Epoch 15 (train): _base/lr=0.0015 | _base/momentum=0.9000 | _timers/_fps=523.3195 | _timers/batch_time=1.2797 | _timers/data_time=1.0064 | _timers/model_time=0.2732 | dice=0.5812 | loss=0.6735
15/30 * Epoch 15 (valid): _base/lr=0.0015 | _base/momentum=0.9000 | _timers/_fps=291.0086 | _timers/batch_time=8.1191 | _timers/data_time=7.9688 | _timers/model_time=0.1501 | dice=0.5817 | loss=0.6875
16/30 * Epoch (train): 100% 41/41 [01:35<00:00,  2.32s/it, dice=0.580, loss=0.678]
16/30 * Epoch (valid): 100% 3/3 [00:25<00:00,  8.55s/it, dice=0.597, loss=0.592]
[2019-11-18 21:58:45,016] 
16/30 * Epoch 16 (train): _base/lr=0.0015 | _base/momentum=0.9000 | _timers/_fps=493.6854 | _timers/batch_time=1.3535 | _timers/data_time=1.0663 | _timers/model_time=0.2870 | dice=0.5890 | loss=0.6610
16/30 * Epoch 1