In [None]:
!pip install /kaggle/input/efficientnet-pytorch/efficientnet_pytorch-0.7.0-py3-none-any.whl

In [None]:
# !pip install torch-lr-finder

In [None]:
!mkdir /root/.cache/torch /root/.cache/torch/hub /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/efficient-b0-b7-from-angtk/*.pth /root/.cache/torch/hub/checkpoints/

In [None]:
import rasterio
from rasterio.windows import Window

In [None]:
import numpy as np
import pandas as pd
import pathlib, sys, os, random, time
import numba, cv2, gc
from efficientnet_pytorch import EfficientNet

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from tqdm.notebook import tqdm

import albumentations as A

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as D

import torchvision
from torchvision import transforms as T

In [None]:
def set_seeds(seed=17):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seeds();

In [None]:
DATA_PATH = '../input/hubmap-kidney-segmentation'
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# used for converting the decoded image to rle mask
def rle_encode(im):
    '''
    im: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = im.flatten(order = 'F')
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape=(256, 256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

@numba.njit()
def rle_numba(pixels):
    size = len(pixels)
    points = []
    if pixels[0] == 1: points.append(0)
    flag = True
    for i in range(1, size):
        if pixels[i] != pixels[i-1]:
            if flag:
                points.append(i+1)
                flag = False
            else:
                points.append(i+1 - points[-1])
                flag = True
    if pixels[-1] == 1: points.append(size-points[-1]+1)    
    return points

def rle_numba_encode(image):
    pixels = image.flatten(order = 'F')
    points = rle_numba(pixels)
    return ' '.join(str(x) for x in points)

def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2 
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx,ny, 4), dtype=np.int64)
    
    for i in range(nx):
        for j in range(ny):
            slices[i,j] = x1[i], x2[i], y1[j], y2[j]    
    return slices.reshape(nx*ny,4)

In [None]:
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)

class HubDataset(D.Dataset):

    def __init__(self, root_dir, transform,
                 window=256, overlap=32, threshold = 100):
        self.path = pathlib.Path(root_dir)
        self.overlap = overlap
        self.window = window
        self.transform = transform
        self.csv = pd.read_csv((self.path / 'train.csv').as_posix(),
                               index_col=[0])
        self.threshold = threshold
        
        self.x, self.y = [], []
        self.build_slices()
        self.len = len(self.x)
        self.as_tensor = T.Compose([
            T.ToTensor(),
            T.Normalize([0.6134, 0.4129, 0.6603],
                        [0.1211, 0.1630, 0.0948]),
        ])
        
    
    def build_slices(self):
        self.masks = []
        self.files = []
        self.slices = []
        for i, filename in enumerate(self.csv.index.values):
            filepath = (self.path /'train'/(filename+'.tiff')).as_posix()
            self.files.append(filepath)
            
            print('Transform', filename)
            with rasterio.open(filepath, transform = identity) as dataset:
                self.masks.append(rle_decode(self.csv.loc[filename, 'encoding'], dataset.shape))
                slices = make_grid(dataset.shape, window=self.window, min_overlap=self.overlap)
                
                for slc in tqdm(slices):
                    x1,x2,y1,y2 = slc
                    if self.masks[-1][x1:x2,y1:y2].sum() > self.threshold or np.random.randint(100) > 120:
                        self.slices.append([i,x1,x2,y1,y2])
                        
                        image = dataset.read([1,2,3],
                            window=Window.from_slices((x1,x2),(y1,y2)))
                        
#                         if image.std().mean() < 10:
#                             continue
                        
                        # print(image.std().mean(), self.masks[-1][x1:x2,y1:y2].sum())
                        image = np.moveaxis(image, 0, -1)
                        self.x.append(image)
                        self.y.append(self.masks[-1][x1:x2,y1:y2])
    
    # get data operation
    def __getitem__(self, index):
        image, mask = self.x[index], self.y[index]
        augments = self.transform(image=image, mask=mask)
        return self.as_tensor(augments['image']), augments['mask'][None]
    
    def __len__(self):
        """
        Total number of samples in the dataset
        """
        return self.len


In [None]:
WINDOW=1024
MIN_OVERLAP=32
NEW_SIZE=512

trfm = A.Compose([
            A.RandomCrop(NEW_SIZE,NEW_SIZE),
            A.HorizontalFlip(),
            A.VerticalFlip(),
            A.RandomRotate90(),
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.05, rotate_limit=15, p=0.5, 
                             border_mode=cv2.BORDER_REFLECT),
            A.OneOf([
                A.GaussianBlur(),
                A.GaussNoise()
            ], p=0.2),
    
            A.OneOf([
                A.HueSaturationValue(5,5,5),
                A.CLAHE(clip_limit=2),
                A.RandomBrightnessContrast(),
                A.ColorJitter(brightness=0.07, contrast=0.07,
                   saturation=0.1, hue=0.1, always_apply=False, p=0.1),
            ], p=0.3),
        ])

ds = HubDataset(DATA_PATH, window=WINDOW, overlap=MIN_OVERLAP, transform=trfm)

In [None]:
# # tensor(0.6134) tensor(0.4129) tensor(0.6603)
# # tensor(0.1211) tensor(0.1630) tensor(0.0948)

# r_m = 0.0
# g_m = 0.0
# b_m = 0.0
# r_s = 0.0
# g_s = 0.0
# b_s = 0.0
# for i,(image, mask) in tqdm(enumerate(ds), total=len(ds)):
#     r_m+=torch.mean(image[0,:,:],)
#     g_m+=torch.mean(image[1,:,:],)
#     b_m+=torch.mean(image[2,:,:],)
#     r_s+=torch.std(image[0,:,:],)
#     g_s+=torch.std(image[1,:,:],)
#     b_s+=torch.std(image[2,:,:],)
# print(r_m/len(ds),g_m/len(ds),b_m/len(ds))
# print(r_s/len(ds),g_s/len(ds),b_s/len(ds))

In [None]:
for i in range(100,120):
    image, mask = ds[i]
    plt.figure(figsize=(16,8))
    plt.subplot(121)
    plt.imshow(mask[0], cmap='gray')
    plt.subplot(122)
    plt.imshow(image[0]);

# _ = rle_numba_encode(mask[0]) # compile function with numba

In [None]:
train_size = int(0.75 * len(ds))
valid_size = len(ds) - train_size
train_ds, valid_ds = torch.utils.data.random_split(ds, [train_size, valid_size])

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as T


# Utility Functions for the model
def double_conv(in_,out_,drop): # Double convolution layer for decoder 
	conv = nn.Sequential(
		nn.Conv2d(in_,out_,kernel_size=3,padding=(1,1)),
		nn.ReLU(inplace=True),
		nn.Conv2d(out_,out_,kernel_size=3,padding=(1,1)),
		nn.ReLU(inplace=True),
        nn.Dropout(drop)
		)
	return conv

def crop(tensor,target_tensor): # Crop tensor to target tensor size
	target_shape = target_tensor.shape[2]
	tensor_shape = tensor.size()[2]
	delta = (target_shape-tensor_shape)//2
	return tensor[:,:,delta:tensor_shape-delta,delta:tensor_shape-delta]


# Hook functions to get values of intermediate layers for cross connection
hook_values = []
def hook(_, input, output):
	global hook_values
	hook_values.append(output) # stores values of each layers in hook_values

indices = []
shapes = []
def init_hook(model,device):
	global shapes, indices, hook_values
	shapes = []
	indices = []
	hook_values = []

	for i in range(len(model._blocks)):
		model._blocks[i].register_forward_hook(hook) #register hooks
	
	image = torch.rand([1,3,576,576])
	image = image.to(device)
	out = model(image) # generate hook values to get shapes
	
	shape = [i.shape for i in hook_values] # get shape of all layers
	
	for i in range(len(shape)-1):
		if shape[i][2]!=shape[i+1][2]: # get indices of layers only where output dimension change
			indices.append(i)
	indices.append(len(shape)-1) # get last layer index
	
	shapes = [shape[i] for i in indices] # get shapes of required layers
	shapes = shapes[::-1]  

encoder_out = []
def epoch_hook(model, image):
	global encoder_out, indices, hook_values
	hook_values = []

	out = model(image) # generate layer outputs with current image
	encoder_out = [hook_values[i] for i in indices] # get layer outputs for selected indices


class EffUNet(nn.Module):

	def __init__(self,model='b0',out_channels=2,dropout=0.1,freeze_backbone=True,pretrained=True,device='cuda'):
		super(EffUNet,self).__init__()
		global layers, shapes

		if model not in set(['b0','b1','b2','b3','b4','b5','b6','b7']):
			raise Exception(f'{model} unavailable.')
		if pretrained:
			self.encoder = EfficientNet.from_pretrained(f'efficientnet-{model}')
		else:
			self.encoder = EfficientNet.from_name(f'efficientnet-{model}')

		# Disable non required layers by replacing them with identity to save time and memory
		self.encoder._conv_head=torch.nn.Identity()
		self.encoder._bn1=torch.nn.Identity()
		self.encoder._avg_pooling=torch.nn.Identity()
		self.encoder._dropout=torch.nn.Identity()
		self.encoder._fc=torch.nn.Identity()
		self.encoder._swish=torch.nn.Identity()

		if isinstance(device, str):
			self.device = torch.device(device)
		else:
			self.device = device
		self.encoder.to(self.device)
		self.encoder._conv_stem.stride=1 # can't replace this layer with identity, so modify
		self.encoder._conv_stem.kernel_size=(1,1) # such that it doesn't affect the output shape

		# freeze encoder
		if freeze_backbone:
			for param in self.encoder.parameters():
				param.requires_grad = False

		# register hooks & get shapes
		init_hook(self.encoder,self.device)

		# Building decoder
		self.decoder = torch.nn.modules.container.ModuleList()
		for i in range(len(shapes)-1):
			self.decoder.append(torch.nn.modules.container.ModuleList())
			self.decoder[i].append(nn.ConvTranspose2d(shapes[i][1],shapes[i][1]-shapes[i+1][1],kernel_size=2,stride=2).to(self.device))
			self.decoder[i].append(double_conv(shapes[i][1],shapes[i+1][1],dropout).to(self.device))

		#output layer
		self.out = nn.Conv2d(shapes[-1][1],out_channels,kernel_size=1).to(self.device)

	def forward(self, image):
		global layers

		# Encoder
		epoch_hook(self.encoder, image) # required outputs accumulate in "encoder_out"

		#Decoder
		x = encoder_out.pop()
		for i in range(len(self.decoder)):
			x = self.decoder[i][0](x) # conv transpose
			prev = encoder_out.pop()
			prev = crop(prev,x) # croping for cross connection
			prev = torch.cat([x,prev],axis=1) # concatenating 
			x = self.decoder[i][1](prev) # double conv
		
		#out
		x = self.out(x)
		return x

In [None]:
# define training and validation data loaders
BATCH_SIZE = 2
loader = D.DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

vloader = D.DataLoader(
    valid_ds, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
@torch.no_grad()
def validation(model, loader, loss_fn):
    losses = []
    model.eval()
    for image, target in loader:
        image, target = image.to(DEVICE), target.float().to(DEVICE)
        output = model(image)
        loss = loss_fn(output, target)
        losses.append(loss.item())
        
    return np.array(losses).mean()

In [None]:
# del model
# gc.collect()

In [None]:
model = EffUNet('b5',out_channels=1,dropout=0.15,freeze_backbone=False,pretrained=True,device=DEVICE)

In [None]:
# model.load_state_dict(torch.load('../input/effunet-hubmap-weights/model_best_vloss144.pth'))

In [None]:
class SoftDiceLoss(nn.Module):
    def __init__(self, smooth=1., dims=(-2,-1)):

        super(SoftDiceLoss, self).__init__()
        self.smooth = smooth
        self.dims = dims
    
    def forward(self, x, y):

        tp = (x * y).sum(self.dims)
        fp = (x * (1 - y)).sum(self.dims)
        fn = ((1 - x) * y).sum(self.dims)
        
        dc = (2 * tp + self.smooth) / (2 * tp + fp + fn + self.smooth)
        dc = dc.mean()

        return 1 - dc
    
bce_fn = nn.BCEWithLogitsLoss()
dice_fn = SoftDiceLoss()

def loss_fn(y_pred, y_true):
    dice = dice_fn(y_pred.sigmoid(), y_true)
    y_true = y_true.type_as(y_pred)
    bce = bce_fn(y_pred, y_true)
#     return 0.3*bce + 0.7*dice
    return 0.5*bce + 0.5*dice

In [None]:
# Ran this Once and found the learning rate around 3e-4
# from torch_lr_finder import LRFinder

# optimizer = torch.optim.Adam(model.parameters(),lr=1e-6, weight_decay=1e-3)
# lr_finder = LRFinder(model, optimizer, loss_fn, device=DEVICE)
# lr_finder.range_test(loader, end_lr=1e-3, num_iter=100)
# lr_finder.plot() # to inspect the loss-learning rate graph
# lr_finder.reset() # to reset the model and optimizer to their initial state

In [None]:
# torch.cuda.empty_cache()

In [None]:
### Table for results
header = r'''
        Train | Valid
Epoch |  Loss |  Loss | Time (s)
'''
#          Epoch         metrics            time
raw_line = '{:6d}' + '\u2502{:7.3f}'*2 + '\u2502{:6.2f}'

In [None]:
learning_rate = 3e-4
optimizer = torch.optim.AdamW(model.parameters(),lr=learning_rate, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=3,
                                                       verbose=True,min_lr=3e-7)
early_stop_count = 0
accumulation_steps = 32

print(header)

best_loss = np.inf
# best_loss = 0.144
EPOCHES = 50
for epoch in range(1, EPOCHES+1):
    losses = []
    start_time = time.time()
    model.train()
    for i,(image, target) in enumerate(loader):
        
        image, target = image.to(DEVICE), target.float().to(DEVICE)
        output = model(image)
        loss = loss_fn(output, target)
        losses.append(loss.item())
        loss = loss / accumulation_steps
        loss.backward()
        if (i+1) % accumulation_steps == 0:             # Wait for several backward steps
            optimizer.step()                            # Now do an optimizer step
            optimizer.zero_grad() 
    vloss = validation(model, vloader, loss_fn)
    print(raw_line.format(epoch, np.array(losses).mean(), vloss, (time.time()-start_time)))
    losses = []
    
    if vloss < best_loss:
        best_loss = vloss
        torch.save(model.state_dict(), 'model_best.pth')
        print('Loss decreased, model saved.')
        early_stop_count = 0
    else:
        early_stop_count += 1
    if early_stop_count == 10:
        print('Early Stopping!')
        break
    scheduler.step(vloss)

In [None]:
model.load_state_dict(torch.load('model_best.pth'))

In [None]:
@torch.no_grad()
def evaluate(model, loader, loss_fn):
    dices = []
    model.eval()
    for image, target in loader:
        image, target = image.to(DEVICE), target.float().to(DEVICE)
        output = model(image)
        dice = dice_fn(output.sigmoid(), target)
        dices.append(dice.item())
        
    print(f'Mean Dice Score: {1 - np.array(dices).mean():.4f}')

evaluate(model, vloader, loss_fn)