This notebook is just gpu version of my previous [notebook](https://www.kaggle.com/vineeth1999/hubmap-eda-pytorch-efficientunet-offline-training/notebook) Since EDA filled my gpu fully, I am running the training process seperately.

<img src='https://www.ml.cmu.edu/news/news-archive/2018/september/research-scientists-will-help-build-3d-cellular-map-of-human-body-machine-learning.jpg'>
<h1><center>HuBMAP: Hacking the Kidney - Training and Inference</center><h1>

# Pytorch Modelling GPU Offline

We are using **pytorch** implementation of **UNet** Model implemented in **https://github.com/qubvel/segmentation_models.pytorch** and this is getting installed offline.

In [None]:
!mkdir -p /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/efficientnet_pytorch-0.6.3.xyz /tmp/pip/cache/efficientnet_pytorch-0.6.3.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/pretrainedmodels-0.7.4.xyz /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/segmentation-models-pytorch-0.1.2.xyz /tmp/pip/cache/segmentation_models_pytorch-0.1.2.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.1.20-py3-none-any.whl /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.2.1-py3-none-any.whl /tmp/pip/cache/
!pip install --no-index --find-links /tmp/pip/cache/ efficientnet-pytorch
!pip install --no-index --find-links /tmp/pip/cache/ segmentation-models-pytorch

## Necessary Imports

In [None]:
from sklearn.model_selection import GroupKFold
import torch
from torch import nn
import torchvision
import cv2
import os
import numpy as np
import pandas as pd
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from scipy.ndimage.interpolation import zoom
import albumentations as A
from torch.nn import functional as F
import matplotlib.pyplot as plt
from PIL import Image
import tifffile as tiff
import cv2
import zipfile
import time
import random
from albumentations.pytorch import ToTensorV2
from segmentation_models_pytorch.unet import Unet
from tqdm.notebook import tqdm

In [None]:
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b0-355c32eb.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b1-f1951068.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b2-8bb594d6.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b3-5fb5a3c3.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b4-6ed6700e.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b5-b6417697.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b6-c76e70fd.pth /root/.cache/torch/hub/checkpoints/
!cp ../input/efficientnet-pytorch-b0-b7/efficientnet-b7-dcc49843.pth /root/.cache/torch/hub/checkpoints/

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(42)
sz = 256  
reduce = 4
TH = 0.39 

## Dataset

In [None]:
def enc2mask(encs, shape):
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for m,enc in enumerate(encs):
        if isinstance(enc,np.float) and np.isnan(enc): continue
        s = enc.split()
        for i in range(len(s)//2):
            start = int(s[2*i]) - 1
            length = int(s[2*i+1])
            img[start:start+length] = 1 + m
    return img.reshape(shape).T

def mask2enc(mask, n=1):
    pixels = mask.T.flatten()
    encs = []
    for i in range(1,n+1):
        p = (pixels == i).astype(np.int8)
        if p.sum() == 0: encs.append(np.nan)
        else:
            p = np.concatenate([[0], p, [0]])
            runs = np.where(p[1:] != p[:-1])[0] + 1
            runs[1::2] -= runs[::2]
            encs.append(' '.join(str(x) for x in runs))
    return encs

#https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
#with bug fix
def rle_encode_less_memory(img):
    #watch out for the bug
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

In [None]:
class HuBMAPDataset(Dataset):
    def __init__(self, ids, phase):
        self.ids = ids
        if phase=='train':
            self.transform = get_train_transform()
        else:
            self.transform = get_val_transform()
        
    def __getitem__(self, idx):
        name = self.ids[idx]
        print(name)
        img = cv2.imread(f"../input/256256-hubmap/train/{name}").astype("float32")[:,:,::-1]
        img /= 255.
        mask = cv2.imread(f"../input/256256-hubmap/masks/{name}")[:,:,0:1]

        transformed = self.transform(image=img, mask=mask)
        img = transformed['image']
        mask = transformed['mask']
        img = img.transpose(2,0,1).astype('float32')
        mask = mask.transpose(2,0,1).astype('float32')
        return img, mask

    def __len__(self):
        return len(self.ids)

        
def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(),
            A.OneOf([
                A.RandomContrast(),
                A.RandomGamma(),
                A.RandomBrightness(),
                ], p=0.3),
            A.OneOf([
                A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                A.GridDistortion(),
                A.OpticalDistortion(distort_limit=2, shift_limit=0.5),
                ], p=0.3),
            A.ShiftScaleRotate(p=0.2),
            A.Resize(256,256,always_apply=True),
    ],p=1.)

def get_val_transform():
    return A.Compose([
        A.Resize(256,256,always_apply=True),
    ],p=1.)

## DataLoader

In [None]:
directory_list = os.listdir('../input/256256-hubmap/train')
dir_df = pd.DataFrame(directory_list, columns=['Image_Paths'])
dir_df

In [None]:
def prepare_train_valid_dataloader(df, fold):
    train_ids = df.loc[~df.Folds.isin(fold), "Image_Paths"].values
    val_ids = df.loc[df.Folds.isin(fold), "Image_Paths"].values
    train_ds = HuBMAPDataset(train_ids, "train")
    val_ds = HuBMAPDataset(val_ids, "val")
    train_loader = DataLoader(train_ds, batch_size=16, pin_memory=True, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_ds, batch_size=4, pin_memory=True, shuffle=False, num_workers=4)
    return train_loader, val_loader

## Model

In [None]:
class HuBMAP(nn.Module):
    def __init__(self):
        super(HuBMAP, self).__init__()
        self.cnn_model = Unet('efficientnet-b5', encoder_weights="imagenet", classes=1, activation=None)
        #self.cnn_model.decoder.blocks.append(self.cnn_model.decoder.blocks[-1])
        #self.cnn_model.decoder.blocks[-2] = self.cnn_model.decoder.blocks[-3]
    
    def forward(self, imgs):
        img_segs = self.cnn_model(imgs)
        return img_segs

## Loss Function

<img src = 'https://wikimedia.org/api/rest_v1/media/math/render/svg/80f87a71d3a616a0939f5360cec24d702d2593a2'>

In [None]:
class DiceLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceLoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        
        #comment out if your model contains a sigmoid or equivalent activation layer
        inputs = F.sigmoid(inputs)       
        
        #flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).sum()                            
        dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        
        return dice
    
    
    
class DiceBCELoss(nn.Module):
    # Formula Given above.
    def __init__(self, weight=None, size_average=True):
        super(DiceBCELoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        
        #comment out if your model contains a sigmoid or equivalent activation layer
        inputs = F.sigmoid(inputs)       
        
        #flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).mean()                            
        dice_loss = 1 - (2.*intersection + smooth)/(inputs.mean() + targets.mean() + smooth)  
        BCE = F.binary_cross_entropy(inputs, targets, reduction='mean')
        Dice_BCE = BCE + dice_loss
        
        return Dice_BCE.mean()

## Train Function

In [None]:
def HuBMAPLoss(images, targets, model, device):
    model.to(device)
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    criterion = DiceBCELoss()
    loss = criterion(outputs, targets)
    return loss, outputs

In [None]:
def train_one_epoch(epoch, model, device, optimizer, scheduler, trainloader):
    model.train()
    t = time.time()
    total_loss = 0
    for step, (images, targets) in enumerate(trainloader):
        loss, outputs = HuBMAPLoss(images, targets, model, device)
        loss.backward()
        if ((step+1)%4==0 or (step+1)==len(trainloader)):
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
        loss = loss.detach().item()
        total_loss += loss
        if ((step+1)%10==0 or (step+1)==len(trainloader)):
            print(
                    f'epoch {epoch} train step {step+1}/{len(trainloader)}, ' + \
                    f'loss: {total_loss/len(trainloader):.4f}, ' + \
                    f'time: {(time.time() - t):.4f}', end= '\r' if (step + 1) != len(trainloader) else '\n'
                )

            
        
def valid_one_epoch(epoch, model, device, optimizer, scheduler, validloader):
    model.eval()
    t = time.time()
    total_loss = 0
    for step, (images, targets) in enumerate(validloader):
        loss, outputs = HuBMAPLoss(images, targets, model, device)
        loss = loss.detach().item()
        total_loss += loss
        if ((step+1)%4==0 or (step+1)==len(validloader)):
            scheduler.step(total_loss/len(validloader))
        if ((step+1)%10==0 or (step+1)==len(validloader)):
            print(
                    f'epoch {epoch} valid step {step+1}/{len(validloader)}, ' + \
                    f'loss: {total_loss/len(validloader):.4f}, ' + \
                    f'time: {(time.time() - t):.4f}', end= '\r' if (step + 1) != len(validloader) else '\n'
                )

## Creating Folds Column

In [None]:
FOLDS = 5
gkf = GroupKFold(FOLDS)
dir_df['Folds'] = 0
for fold, (tr_idx, val_idx) in enumerate(gkf.split(dir_df, groups=dir_df[dir_df.columns[0]].values)):
    dir_df.loc[val_idx, 'Folds'] = fold

## The Real Training

In [None]:
for fold, (tr_idx, val_idx) in enumerate(gkf.split(dir_df, groups=dir_df[dir_df.columns[0]].values)):
    if fold>1:
        break
    trainloader, validloader = prepare_train_valid_dataloader(dir_df, [fold])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = HuBMAP().to(device)
    optimizer = Adam(model.parameters(), lr=5e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=0.1, step_size=1)
    #num_epochs = 15
    num_epochs = 2
    for epoch in range(num_epochs):
        train_one_epoch(epoch, model, device, optimizer, scheduler, trainloader)
        with torch.no_grad():
            valid_one_epoch(epoch, model, device, optimizer, scheduler, validloader)
    torch.save(model.state_dict(),f'FOLD-{fold}-model.pth')