## Imports

In [12]:
import sys
import os
import cv2
import pdb
import time
import warnings
import random
import math
import glob
import operator

import numpy as np
import pandas as pd

from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from torchcontrib.optim import SWA

from matplotlib import pyplot as plt
import albumentations as albu
from albumentations.torch import ToTensor
warnings.filterwarnings("ignore")

import nvidia_smi
from apex import amp

In [25]:
from model import SegmentationModel
from scheduler import CosineAnnealingLR_with_Restart
from losses import ComboLoss, compute_iou_batch, soft_bce_loss, soft_focal_loss, soft_dice_loss

## RLE-Mask utility functions

In [14]:
#https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 -> mask, 0 -> background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_mask(row_id, df):
    '''Given a row index, return image_id and mask (256, 1600, 4) from the dataframe `df`'''
    fname = df.iloc[row_id].name
    labels = df.iloc[row_id][:4]
    masks = np.zeros((256, 1600, 4), dtype=np.float32) # float32 is V.Imp
    # 4:class 1～4 (ch:0～3)

    for idx, label in enumerate(labels.values):
        if label is not np.nan:
            label = label.split(" ")
            positions = map(int, label[0::2])
            length = map(int, label[1::2])
            mask = np.zeros(256 * 1600, dtype=np.uint8)
            for pos, le in zip(positions, length):
                mask[pos:(pos + le)] = 1
            masks[:, :, idx] = mask.reshape(256, 1600, order='F')
    return fname, masks

## Dataloader + Augemation

In [15]:
def get_transforms(phase, mean, std):
    list_transforms = []
    if phase == "train":
        list_transforms.extend(
            [
                albu.RandomBrightness(limit=(-0.25, 0.25), p=0.5),
                albu.RandomContrast(limit=(-0.15, 0.4), p=0.5),
                albu.RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=0.5),
                albu.HorizontalFlip(p=0.5)
            ]
        )
    list_transforms.extend(
        [
            albu.Normalize(mean=mean, std=std, p=1),
            ToTensor(),
        ]
    )
    list_trfms = albu.Compose(list_transforms)
    return list_trfms

class SteelDataset(Dataset):
    def __init__(self, df, data_folder, mean, std, phase):
        self.df = df
        self.root = data_folder
        self.mean = mean
        self.std = std
        self.phase = phase
        self.transforms = get_transforms(phase, mean, std)
        self.fnames = self.df.index.tolist()

    def __getitem__(self, idx):
        image_id, mask = make_mask(idx, self.df)
        image_path = os.path.join(self.root, "train_images",  image_id)
        img = cv2.imread(image_path)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask'] # 1x256x1600x4
        mask = mask[0].permute(2, 0, 1) # 1x4x256x1600
        return img, mask

    def __len__(self):
        return len(self.fnames)
    
def _init_fn(worker_id):
    np.random.seed(int(SEED))

def provider(
    data_folder,
    train_df,
    valid_df,
    phase,
    mean=None,
    std=None,
    batch_size=8,
    num_workers=0,
):
    '''Returns dataloader for the model training'''
    
    df = train_df if phase == "train" else valid_df
    image_dataset = SteelDataset(df, data_folder, mean, std, phase)
    
    dataloader = DataLoader(
        image_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        shuffle=True,       
        worker_init_fn = _init_fn
    )

    return dataloader

## Metric scheduler + Trainer

In [16]:
def predict(X, threshold):
    '''X is sigmoid output of the model'''
    X_p = np.copy(X)
    preds = (X_p > threshold).astype('uint8')
    return preds

class Meter:
    '''A meter to keep track of iou and dice scores throughout an epoch'''
    def __init__(self, phase, epoch):
        self.base_threshold = 0.5
        self.iou_scores = []
        self.base_bce_scores = []
        self.base_focal_scores = []
        self.base_dice2_scores = []

    def update(self, targets, outputs):
        probs = torch.sigmoid(outputs)
        
        preds = predict(probs, self.base_threshold)
        iou = compute_iou_batch(preds, targets, classes=[1])
        self.iou_scores.append(iou)
        
        bce = soft_bce_loss(probs, targets)
        self.base_bce_scores.append(bce)
        
        focal = soft_focal_loss(probs, targets)
        self.base_focal_scores.append(focal)
        
        dice2 = soft_dice_loss(probs, targets)
        self.base_dice2_scores.append(dice2)

    def get_metrics(self):
        #dice = np.mean(self.base_dice_scores)
        #dice_neg = np.mean(self.dice_neg_scores)
        #dice_pos = np.mean(self.dice_pos_scores)
        #dices = [dice, dice_neg, dice_pos]
    
        iou = np.nanmean(self.iou_scores) 
        
        bce = np.mean(self.base_bce_scores)
        focal = np.mean(self.base_focal_scores)    
        dice2 = np.mean(self.base_dice2_scores)
        
        return bce, focal, iou, dice2

def epoch_log(phase, epoch, epoch_loss, meter, start):
    '''logging the metrics at the end of an epoch'''
    
    bce, focal, iou, dice2 = meter.get_metrics()
    
    print("Loss: %0.4f | IoU: %0.4f | bce: %0.4f | focal: %0.4f | dice2: %0.4f" % (epoch_loss, iou, bce, focal, dice2))
    
    return bce, focal, iou, dice2


In [17]:
class Trainer(object):
    '''This class takes care of training and validation of our model'''
    def __init__(self, model, train_df, valid_df, fold):
         
        self.lr = 5e-4
        
        optimizer = SWA(optim.Adam(model.parameters(), lr=self.lr))
        
        self.net, self.optimizer = amp.initialize(
            model.to('cuda:0'), optimizer, opt_level="O2",
            keep_batchnorm_fp32=True, loss_scale="dynamic")
        
        self.fold = fold
        self.num_workers = 0
        self.batch_size = {"train": 4, "val": 4}
        self.accumulation_steps = 32 // self.batch_size['train']
        self.num_epochs = 30
        self.best_loss = float("inf")
        self.phases = ["train", "val"]
        self.device = torch.device("cuda:0")
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
        self.criterion = ComboLoss(weights={'bce':2, 'dice':2,'focal':1})
        self.scheduler = CosineAnnealingLR_with_Restart(
            self.optimizer,
            T_max=6,
            T_mult=1, 
            model=self.net, 
            out_dir='segmentation_fold_' + str(fold), 
            take_snapshot=True, 
            eta_min=1e-6)
        self.net = self.net.to(self.device)
        cudnn.benchmark = True
        self.dataloaders = {
            phase: provider(
                data_folder=data_folder,
                train_df=train_df,
                valid_df=valid_df,
                phase=phase,
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                batch_size=self.batch_size[phase],
                num_workers=self.num_workers,
            )
            for phase in self.phases
        }
        self.losses = {phase: [] for phase in self.phases}
        self.iou_scores = {phase: [] for phase in self.phases}
        self.bce_scores = {phase: [] for phase in self.phases}
        self.focal_scores = {phase: [] for phase in self.phases}
        self.dice_scores = {phase: [] for phase in self.phases}
        
    def forward(self, images, targets):
        images = images.to(self.device)
        masks = targets.to(self.device)
        outputs = self.net(images)
        loss = self.criterion(outputs, masks)
        return loss, outputs

    def iterate(self, epoch, phase):
        meter = Meter(phase, epoch)
        
        start = time.strftime("%H:%M:%S")
        print(f"Starting epoch: {epoch} | phase: {phase} | ⏰: {start}")
        
        batch_size = self.batch_size[phase]
        self.net.train(phase == "train")
        dataloader = self.dataloaders[phase]
        running_loss = 0.0
        total_batches = len(dataloader)
        self.optimizer.zero_grad()
    
        for itr, batch in enumerate(dataloader):
            images, targets = batch
            loss, outputs = self.forward(images, targets)
            loss = loss / self.accumulation_steps
            if phase == "train":
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()
                #loss.backward()
                if (itr + 1 ) % self.accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()
            running_loss += loss.item()
            outputs = outputs.detach().cpu()
            meter.update(targets, outputs)

        epoch_loss = (running_loss * self.accumulation_steps) / total_batches
        
        bce, focal, iou, dice = epoch_log(phase, epoch, epoch_loss, meter, start)   
        self.losses[phase].append(epoch_loss)
        self.bce_scores[phase].append(bce)
        self.focal_scores[phase].append(focal)
        self.iou_scores[phase].append(iou)
        self.dice_scores[phase].append(dice)
        
        nvidia_smi.nvmlInit()
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
        # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
        
        torch.cuda.empty_cache()
        
        return epoch_loss

    def start(self):
        for epoch in range(self.num_epochs):
            self.iterate(epoch, "train")
            state = {
                "epoch": epoch,
                "best_loss": self.best_loss,
                "state_dict": self.net.state_dict(),
                "optimizer": self.optimizer.state_dict(),
                'amp': amp.state_dict()
            }
            
            #to prevent GPU memory from overflowing on validation
            with torch.no_grad():
                val_loss = self.iterate(epoch, "val")
                self.scheduler.step(val_loss)
                
            if val_loss < self.best_loss:
                print("******** New optimal found, saving state ********")
                state["best_loss"] = self.best_loss = val_loss
                torch.save(state, f'./model{self.fold}.pth' )
                
            if epoch % 9 == 0 or epoch > 9 and epoch % 3 == 0:
                self.optimizer.update_swa()
            print()
        self.optimizer.swap_swa_sgd()


### Training

In [18]:
SEED = 69
random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False 

In [19]:
sample_submission_path = './data/sample_submission.csv'
train_df_path = './data/train.csv'
data_folder = "./data/"
test_data_folder = "./data/test_images"
traint_data_folder = './data/train_images'

In [20]:
df = pd.read_csv(train_df_path)

df['ImageId'], df['ClassId'] = zip(*df['ImageId_ClassId'].str.split('_'))
df['ClassId'] = df['ClassId'].astype(int)
df = df.pivot(index='ImageId',columns='ClassId',values='EncodedPixels')
df['defects'] = df.count(axis=1)

In [21]:
import math

In [26]:
from sklearn.model_selection import KFold

cv = KFold(n_splits=5, random_state=SEED, shuffle=True)
for i, (train_index, validation_index) in enumerate(cv.split(df)):
    model = SegmentationModel("efficientnet-b3", encoder_weights="imagenet", classes=4, activation='sigmoid')
    model_trainer = Trainer(model, df.iloc[train_index], df.iloc[validation_index], i+1)
    model_trainer.start()
    del model_trainer, model

Selected optimization level O2:  FP16 training with FP32 batchnorm and FP32 master weights.

Defaults for this optimization level are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic
Starting epoch: 0 | phase: train | ⏰: 21:57:55
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


KeyboardInterrupt: 

### Plotting losses

In [None]:
# PLOT TRAINING
losses = model_trainer.losses
iou_scores = model_trainer.iou_scores
bce_scores = model_trainer.bce_scores
focal_scores = model_trainer.focal_scores
dice_scores = model_trainer.dice_scores

def plot(scores, name):
    plt.figure(figsize=(15,5))
    plt.plot(range(len(scores["train"])), scores["train"], label=f'train {name}')
    plt.plot(range(len(scores["train"])), scores["val"], label=f'val {name}')
    plt.title(f'{name} plot'); plt.xlabel('Epoch'); plt.ylabel(f'{name}');
    plt.legend(); 
    plt.show()

plot(losses, "Combo loss")
plot(iou_scores, "IoU score")


plot(bce_scores, "BCE loss")
plot(dice_scores, "Dice loss")
plot(focal_scores, "Focal loss")