**Description**

Starter Keras code: simple end-to-end solution to start with.


Training data: pre-processed Dataset - https://www.kaggle.com/iafoss/256x256-images/.

Model: UNET network from segmentation_models with ResNet-34 backbone.

Works fine with public and private dataset by usage of CPU instead GPU.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import tifffile
import matplotlib.pyplot as plt
import gc

%env SM_FRAMEWORK=tf.keras
import keras
from keras.losses import binary_crossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Adam

from tqdm.notebook import tqdm

In [None]:
BASE_PATH = "../input/hubmap-kidney-segmentation/"
TRAIN_PATH = os.path.join(BASE_PATH, "train")

submission_df = pd.read_csv(os.path.join(BASE_PATH, 'sample_submission.csv'))
submission_df.head()

In [None]:
#functions to convert encoding to mask and mask to encoding
# taken from @iafoss notebook
def enc2mask(encs, shape):
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for m,enc in enumerate(encs):
        if isinstance(enc,np.float) and np.isnan(enc): continue
        s = enc.split()
        for i in range(len(s)//2):
            start = int(s[2*i]) - 1
            length = int(s[2*i+1])
            img[start:start+length] = 1 + m
    return img.reshape(shape).T

#https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
#with bug fix from @iafoss
def rle_encode_less_memory(pixels):
    #watch out for the bug
    #pixels = pixels.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

**Model**

In [None]:
BACKBONE = 'resnet34'
unet_in_shape = (256, 256, 3)
mask_shape = (256, 256)
BATCHSIZE = 32

train_model = False

In [None]:
if train_model:
    !pip install segmentation-models --quiet
    import segmentation_models as sm

    model = sm.Unet(
        BACKBONE, 
        encoder_weights='imagenet',
        classes=1,
        input_shape=unet_in_shape,
        activation='sigmoid',
        encoder_freeze=False
    )

    model.compile(optimizer=Adam(lr=0.0001), loss=binary_crossentropy)

    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1, mode='min', min_delta=0.0001)
    es = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, verbose=1, mode='min', restore_best_weights=True)

**Datagenerator**

In [None]:
class DataGenerator_Train_256(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, names, base_path='../input/train_images',
                 dim_in=(256,256), batch_size=32, n_channels=3, random_state=12, shuffle=True):
        self.names = names
        self.dim_in = dim_in
        self.batch_size = batch_size
        self.base_path  = base_path
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.random_state = random_state
        self.on_epoch_end()
        np.random.seed(self.random_state)
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.names) / self.batch_size))
        
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        names = self.names[index*self.batch_size:(index+1)*self.batch_size]
        # Generate data
        X, y = self.__data_generation(names)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle == True:
            np.random.seed(self.random_state)
            np.random.shuffle(self.names)
            
    def __data_generation(self, names):
        'Generates data containing batch_size samples'
        # Initialization
        X = np.empty((self.batch_size, *self.dim_in, self.n_channels), dtype=np.uint8)
        y = np.empty((self.batch_size, *self.dim_in, 1), dtype=np.uint8)

        # Generate data
        for i, name in enumerate(names):
            X[i,] = cv2.imread(os.path.join(self.base_path, 'train', name))
            y[i,:,:,0] = cv2.imread(os.path.join(self.base_path, 'masks', name))[:,:,0]
        
        # im pre-proc
        X = X.astype(np.float32)/255
        
        return X, y

In [None]:
from sklearn.model_selection import train_test_split

names = os.listdir('../input/hubmap-256-original/train/')
print(len(names), names[0])

train_names, val_names = train_test_split(
        names, random_state=42, test_size=0.2)

for name in names:
    image = cv2.imread(os.path.join('../input/hubmap-256-original/train/', name))
    mask = cv2.imread(os.path.join('../input/hubmap-256-original/masks/', name))
    if np.sum(mask):
        break

plt.figure()
plt.subplot(1,2,1)
plt.imshow(image)
plt.subplot(1,2,2)
plt.imshow(mask*255)

**Training**

In [None]:
train_generator = DataGenerator_Train_256(
    train_names,
    base_path = '../input/hubmap-256-original/',
    batch_size=BATCHSIZE,
    dim_in = unet_in_shape[:2],
    shuffle=True,
)
    
val_generator = DataGenerator_Train_256(
    val_names,
    base_path = '../input/hubmap-256-original/',
    batch_size=BATCHSIZE,
    dim_in = unet_in_shape[:2],
    shuffle=True,
)


if train_model:
    history = model.fit_generator(
        train_generator,
        validation_data=val_generator,
        callbacks=[rlrop, es],
        epochs=20)

    history_df = pd.DataFrame(history.history)
    history_df[['loss', 'val_loss']].plot()

    model.save('hubmap_modelresnet34_2211.h5')
else:
    from keras.models import load_model
    model = load_model('../input/hubmap-nets/hubmap_modelresnet34_2211.h5')

In [None]:
def predict_on_batch(test_in_image, test_in_id, mask_out, size):
    pred_mask = model.predict(np.array(test_in_image).astype(np.float32)/255)   
    for mask_id in range(len(test_in_image)):
        tiid = test_in_id[mask_id]
        yid = int(tiid%(mask_out.shape[1]/size))
        xid = int(tiid//(mask_out.shape[1]/size))
        mask_out[xid*size:(xid+1)*size, yid*size:(yid+1)*size] = pred_mask[mask_id,:,:,0]

**Prediction**

In [None]:
%%time
# image pre-processing taken from https://www.kaggle.com/iafoss/256x256-images
sz = 256   #the size of tiles
reduce = 4 #reduce the original images by 4 times 
s_th = 40  #saturation blancking threshold
p_th = 200*sz//256 #threshold for the minimum number of pixels

PREDBATCHSIZE = 128

for index, row in tqdm(submission_df.iterrows(),total=len(submission_df)):
    test_in_image, test_in_id = [], []
    #read image and generate the mask
    img = tifffile.imread(os.path.join(BASE_PATH, 'test', row.id +'.tiff'))
    if len(img.shape) == 5:img = np.transpose(img.squeeze(), (1,2,0))

    # INPUT IMAGES PRE-PROCESSING
    #add padding to make the image dividable into tiles
    shape = img.shape
    pad0 = (reduce*sz - shape[0]%(reduce*sz))%(reduce*sz)
    pad1 = (reduce*sz - shape[1]%(reduce*sz))%(reduce*sz)
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],
                constant_values=0)
    #split image and mask into tiles using the reshape+transpose trick
    img = cv2.resize(img,(img.shape[1]//reduce,img.shape[0]//reduce),
                         interpolation = cv2.INTER_AREA)
    
    # create array for mask gathering
    mask_out = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
    
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)

    for i, im in enumerate(img):
        #remove black or gray images based on saturation check
        hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        if (s>s_th).sum() <= p_th or im.sum() <= p_th: continue
        
        test_in_image.append(im)
        test_in_id.append(i)
        #predict for test batch
        if len(test_in_image) == PREDBATCHSIZE:
            predict_on_batch(test_in_image, test_in_id, mask_out, sz)
            test_in_image, test_in_id = [], []
    # predict for tail
    if len(test_in_image) > 0:
        predict_on_batch(test_in_image, test_in_id, mask_out, sz)
    
    del img, test_in_image, test_in_id

    # zoom out and crop padding 
    mask_out = cv2.resize(mask_out, (mask_out.shape[1]*reduce, mask_out.shape[0]*reduce),
                          interpolation = cv2.INTER_LINEAR)
    mask_out = mask_out[pad0//2:-(pad0-pad0//2), pad1//2:-(pad1-pad1//2)]
    
    # round
    mask_out = (mask_out > 0.5).astype(np.int8)
    mask_out = mask_out.T.flatten()
    
    # encode mask
    enc_mask = rle_encode_less_memory(mask_out)
    print(np.sum(mask_out), len(enc_mask))
    submission_df.loc[index, 'predicted'] = enc_mask
    del enc_mask, mask_out
    gc.collect()

In [None]:
submission_df.head()

In [None]:
filename = 'submission.csv'
submission_df.to_csv(filename,index=False)