# Setup

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from skimage import io
import glob, os, json, cv2, gc, shutil
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K

IMG_SIZE = 1024
INPUT_PATH = "../input/hubmap-kidney-segmentation"
PRINT_PLOTS = False
FULL_RUN = True

def create_folder(folder):
    if not os.path.exists(folder):
        os.makedirs(folder)
        
create_folder("./plots")
create_folder("./train")
create_folder("./test")

df_train = pd.read_csv(os.path.join(INPUT_PATH, 'train.csv'))
display(df_train)

df_image_info = pd.read_csv(os.path.join(INPUT_PATH,'HuBMAP-20-dataset_information.csv'))
display(df_image_info)

# Image Functions

In [None]:
def read_tiff(image_path):
    image = io.imread(image_path)
    image = np.squeeze(image) # some images have unnecessary axes with shape 1 --> remove
    if image.shape[0] == 3: # some images have color as first axis -> swap axes
        image = image.swapaxes(0,1)
        image = image.swapaxes(1,2)
    return image

def read_mask(image, encoded_mask):
    mask = rle_decode(encoded_mask, (image.shape[1], image.shape[0])) # with inverted axes
    mask = mask.swapaxes(0,1) # swap back axes
    mask = np.expand_dims(mask, -1) # add one axis to have same shape as images
    return mask

def delete_directory_contents(dir):
    for file in os.scandir(dir):
        os.remove(file.path)
        
def plot_masked_image(image, mask, name):
    plt.imshow(image, interpolation='none')
    plt.imshow(mask, cmap='jet', alpha=0.3, interpolation='none')
    
    plt.savefig(f"./plots/{name}.png", dpi = 1000)
    plt.show()
    
def slice_images(image_id, image, mask=[], folder=""):
    print('Slicing Image ' + image_id + ' ...')

    possible_slices_x = image.shape[0] // IMG_SIZE
    possible_slices_y = image.shape[1] // IMG_SIZE

    for x in range(possible_slices_x):
        for y in range(possible_slices_y):
            image_slice = image[x * IMG_SIZE : (x+1) * IMG_SIZE, y * IMG_SIZE : (y+1) * IMG_SIZE]
            
            #if np.any(image_slice) and not (image_slice > 200).all(): # only process non-black and non-gray images --> no background images

            if not len(mask) == 0:
                mask_slice = mask[x * IMG_SIZE : (x+1) * IMG_SIZE, y * IMG_SIZE : (y+1) * IMG_SIZE] * 255
                if 255 in mask_slice:
                    cv2.imwrite(f"./{folder}/{image_id}-imgslice.{x}.{y}.jpg", image_slice)
                    cv2.imwrite(f"./{folder}/{image_id}-maskslice.{x}.{y}.png", mask_slice.astype(int))
            else:
                cv2.imwrite(f"./{folder}/{image_id}-imgslice.{x}.{y}.jpg", image_slice)

# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

## ref.: https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

# Slice Training Images to Uniform Size

In [None]:
def slice_training_images(df_train):
    if not FULL_RUN:
        df_train = df_train.iloc[0:1, :]  # only use one training image for quicker debug runs
    else:
        df_train = df_train.iloc[1:, :]
    for index, train_sample in df_train.iterrows():
        image_id = train_sample['id']
        encoded_mask = train_sample['encoding']

        image_path = os.path.join(INPUT_PATH, f"train/{image_id}.tiff")
        image = read_tiff(image_path)
        mask = read_mask(image, encoded_mask)

        if PRINT_PLOTS:
            image_to_plot = cv2.resize(image, (0, 0), fx=0.25, fy=0.25)
            mask_to_plot = cv2.resize((mask * 255).astype('float32'), (0, 0), fx=0.25, fy=0.25)
            plot_masked_image(image_to_plot, mask_to_plot, image_id)

        slice_images(image_id, image, mask, "train")

# Loss and Metrics

In [None]:
# ref.: https://gist.github.com/CarloSegat/1a2816676c48607dac9dda38afe4f3d9
def weighted_binary_crossentropy(y_true, y_pred, weight1=5, weight0=1):
    y_true = K.clip(y_true, K.epsilon(), 1-K.epsilon())
    y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon())
    logloss = -(y_true * K.log(y_pred) * weight1 + (1 - y_true) * K.log(1 - y_pred) * weight0 )
    return K.mean( logloss, axis=-1)

In [None]:
# ref.: https://github.com/keras-team/keras/issues/3611
def dice_coef(y_true, y_pred, smooth=1):
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
    return K.mean( (2. * intersection + smooth) / (union + smooth), axis=0)

# U-Net Model for our Kidney Images
Reference: https://arxiv.org/abs/1505.04597

In [None]:
def unet():
    input = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3 ))

    contraction1 = layers.Conv2D(32, 3, activation='relu', padding='same')(input)
    contraction1 = layers.Conv2D(32, 3, activation='relu', padding='same')(contraction1)
    pooling1 = layers.MaxPooling2D(2)(contraction1)

    contraction2 = layers.Conv2D(64, 3, activation='relu', padding='same')(pooling1)
    contraction2 = layers.Conv2D(64, 3, activation='relu', padding='same')(contraction2)
    pooling2 = layers.MaxPooling2D(2)(contraction2)

    contraction3 = layers.Conv2D(128, 3, activation='relu', padding='same')(pooling2)
    contraction3 = layers.Conv2D(128, 3, activation='relu', padding='same')(contraction3)
    pooling3 = layers.MaxPooling2D(2)(contraction3)

    contraction4 = layers.Conv2D(256, 3, activation='relu', padding='same')(pooling3)
    contraction4 = layers.Conv2D(256, 3, activation='relu', padding='same')(contraction4)
    pooling4 = layers.MaxPooling2D(2)(contraction4)
    
    contraction5 = layers.Conv2D(512, 3, activation='relu', padding='same')(pooling4)
    contraction5 = layers.Conv2D(256, 3, activation='relu', padding='same')(contraction5)

    up_sampling1 = layers.UpSampling2D(2)(contraction5)
    expansion1 = layers.concatenate([up_sampling1, contraction4], axis=-1)
    expansion1 = layers.Conv2D(512, 3, activation='relu', padding='same')(expansion1)
    expansion1 = layers.Conv2D(128, 3, activation='relu', padding='same')(expansion1)
    
    up_sampling2 = layers.UpSampling2D(2)(expansion1)
    expansion2 = layers.concatenate([up_sampling2, contraction3], axis=-1)
    expansion2 = layers.Conv2D(256, 3, activation='relu', padding='same')(expansion2)
    expansion2 = layers.Conv2D(64, 3, activation='relu', padding='same')(expansion2)
    
    up_sampling3 = layers.UpSampling2D(2)(expansion2)
    expansion3 = layers.concatenate([up_sampling3, contraction2], axis=-1)
    expansion3 = layers.Conv2D(128, 3, activation='relu', padding='same')(expansion3)
    expansion3 = layers.Conv2D(32, 3, activation='relu', padding='same')(expansion3)
    
    up_sampling4 = layers.UpSampling2D(2)(expansion3)
    expansion4 = layers.concatenate([up_sampling4, contraction1], axis=-1)
    expansion4 = layers.Conv2D(64, 3, activation='relu', padding='same')(expansion4)
    expansion4 = layers.Conv2D(32, 3, activation='relu', padding='same')(expansion4)
    output = layers.Conv2D(1, 1, activation='sigmoid', name='output', padding='same')(expansion4)
    
    learning_rate = 5e-4
    optimizer = tf.keras.optimizers.Adam(learning_rate)
    model = keras.Model(inputs=[input], outputs=[output])
    losses = {'output': weighted_binary_crossentropy}
    metrics = {'output': ["acc", dice_coef]}
    model.compile(optimizer=optimizer, loss = losses, metrics=metrics)
    model.summary()
    
    if PRINT_PLOTS:
        keras.utils.plot_model(model, to_file="./model.png", show_shapes=True)

    return model

# Mapping Functions to Read Sliced Images and Convert them to Tensors

In [None]:
def image_to_tensor(image_path, is_png):
    image = tf.io.read_file(image_path)
    if is_png:
        image = tf.image.decode_png(image)
    else:
        image = tf.image.decode_jpeg(image)
        
    image = tf.image.convert_image_dtype(image, tf.float32)
    
    return image


def prepare_images_for_unet(image_path, mask_path):
    image = image_to_tensor(image_path, is_png=False)
    mask = image_to_tensor(mask_path, is_png=True)

    return image, mask


def prepare_image_for_unet(image_path):
    image = image_to_tensor(image_path, is_png=False)

    return image

# Model Training

In [None]:
def train_model():
    image_slice_paths = sorted(glob.glob('../working/train/*.jpg'))
    mask_slice_paths = sorted(glob.glob('../working/train/*.png'))
    
    train_dataset = tf.data.Dataset.from_tensor_slices((image_slice_paths, mask_slice_paths)).shuffle(len(image_slice_paths)).map(prepare_images_for_unet).batch(16).prefetch(tf.data.experimental.AUTOTUNE)
    print(len(image_slice_paths))
    if PRINT_PLOTS:
        for image_index in range(110, 130):
            image_slice_path = image_slice_paths[image_index]
            mask_slice_path = mask_slice_paths[image_index]

            image = cv2.imread(image_slice_path)
            mask = cv2.imread(mask_slice_path)

            plot_masked_image(image, mask, f"slice-{image_index}")
            del image
            del mask
    train_dataset = tf.data.Dataset.from_tensor_slices((image_slice_paths, mask_slice_paths)).shuffle(
        len(image_slice_paths)).map(prepare_images_for_unet).batch(1).prefetch(tf.data.experimental.AUTOTUNE)
    epochs = 30
    verbose = 0
    model = unet()
    model.fit(train_dataset,
              epochs=epochs,
              verbose=verbose)
    model.save('./model.h5')
    del train_dataset
    del image_slice_paths
    del mask_slice_paths

In [None]:
slice_training_images(df_train)

In [None]:
train_model()

# Cleanup

In [None]:
shutil.rmtree('./train')
shutil.rmtree('./test')
shutil.rmtree('./plots')