# Introduction

This is a notebook exploring [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/pdf/1505.04597.pdf) for the 2018 Data Science Bowl.

Thanks to [Kjetil Åmdal-Sævik's Keras U-Net starter - LB 0.277](https://www.kaggle.com/keegil/keras-u-net-starter-lb-0-277?scriptVersionId=2164855) for the inspiration.


In [26]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

from skimage.io import imread 
from skimage.transform import resize
from skimage.morphology import label

from keras.layers import Input, Conv2D, Lambda, MaxPooling2D, Conv2DTranspose, concatenate
from keras.models import Model, load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

In [27]:
TRAIN_PATH = '../input/stage1_train/'
TEST_PATH = '../input/stage1_test/'

IMG_WIDTH = IMG_HEIGHT = 256
IMG_CHANNELS = 3

Stated in [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/pdf/1505.04597.pdf):

> To minimize the overhead and make maximum use
of the GPU memory, we favor large input tiles over a large batch size and hence
reduce the batch to a single image.

So we'll adjust the epochs but stick to the batch size of 1.

In [28]:
NUM_EPOCHS = 30
STEPS_PER_EPOCH = 600 # there are 670 images
BATCH_SIZE = 1

# Preprocessing

In [29]:
train_ids = next(os.walk(TRAIN_PATH))

mask_count = 0
for train_id in train_ids[1]:
    masks = next(os.walk(TRAIN_PATH + train_id + '/masks/'))[2]
    mask_count += len(masks)

print('There are {} images.'.format(len(train_ids[1])))
print('There are {} masks.'.format(mask_count))
print('Average {} masks per image.'.format(mask_count // len(train_ids[1])))

In [30]:
#input/
#    stage1_train/
#         image_id/
#             images/
#             masks/
#         image_id/
# ...
train_ids = next(os.walk(TRAIN_PATH))[1]
test_ids = next(os.walk(TEST_PATH))[1]

In [31]:
print(train_ids[:5])

In [32]:
train_X = np.zeros((len(train_ids), IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS), dtype=np.uint8)
train_Y = np.zeros((len(train_ids), IMG_WIDTH, IMG_HEIGHT, 1), dtype=np.bool)

print('Preparing the training data...')

for index_, image_id in tqdm(enumerate(train_ids), total=len(train_ids)):
    # base path
    path = TRAIN_PATH + image_id
    # getting the images
    image_path = path + '/images/' + image_id + '.png'
    image = imread(image_path)
    resized = resize(image, (IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS), mode='constant', preserve_range=True)
    train_X[index_] = resized
    # getting the masks
    complete_mask = np.zeros((IMG_WIDTH, IMG_HEIGHT, 1), dtype=np.bool)
    for mask_id in next(os.walk(path + '/masks/'))[2]:
        mask_path = path + '/masks/' + mask_id
        mask = imread(mask_path)
        resized_mask = resize(mask, (IMG_WIDTH, IMG_HEIGHT, 1), mode='constant', preserve_range=True)
        # creating one mask for all the masks for this image
        complete_mask = np.maximum(resized_mask, complete_mask)
    train_Y[index_] = complete_mask
    
print(train_X[:5])

> we primarily need shift and rotation invariance as well as
robustness to deformations and gray value variations

In [33]:
datagen = ImageDataGenerator(rotation_range=45, 
                             width_shift_range=0.25, 
                             height_shift_range=0.25, 
                             horizontal_flip=True, 
                             vertical_flip=True)
datagen.fit(train_X)

In [None]:
random_i = random.randint(0, len(train_ids)) 

plt.imshow(train_X[random_i])
plt.show()
plt.imshow(np.squeeze(train_Y[random_i]))
plt.show()

In [None]:
test_ids = next(os.walk(TEST_PATH))[1]
print(test_ids[1][0])

In [None]:
test_X = np.zeros((len(test_ids), IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS), dtype=np.uint8)

test_image_sizes = [] # we are going to resize the predicted test images back to original size

for index_, test_id in tqdm(enumerate(test_ids), total=len(test_ids)):
    image_path = TEST_PATH + test_id + '/images/' + test_id + '.png'
    image = imread(image_path)
    test_image_sizes.append((image.shape[0], image.shape[1]))
    resized = resize(image, (IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS), mode='constant', preserve_range=True)
    test_X[index_] = resized

In [None]:
random_i = random.randint(0, len(test_X))

plt.imshow(test_X[random_i])
plt.show()

# Training

In [None]:
inputs = Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
s = Lambda(lambda x: x / 255) (inputs)

conv_1 = Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
conv_1 = Conv2D(16, (3, 3), activation='relu', padding='same')(conv_1)
pool_1 = MaxPooling2D((2, 2))(conv_1)

conv_2 = Conv2D(32, (3, 3), activation='relu', padding='same')(pool_1)
conv_2 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv_2)
pool_2 = MaxPooling2D((2, 2))(conv_2)

conv_3 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool_2)
conv_3 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv_3)
pool_3 = MaxPooling2D((2, 2))(conv_3)

conv_4 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool_3)
conv_4 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv_4)
pool_4 = MaxPooling2D((2, 2))(conv_4)

conv_5 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool_4)
conv_5 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv_5)

up_6 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv_5)
up_6 = concatenate([up_6, conv_4], axis=3)
conv_6 = Conv2D(128, (3, 3), activation='relu', padding='same')(up_6)
conv_6 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv_6)

up_7 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv_6)
up_7 = concatenate([up_7, conv_3], axis=3)
conv_7 = Conv2D(64, (3, 3), activation='relu', padding='same')(up_7)
conv_7 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv_7)

up_8 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv_7)
up_8 = concatenate([up_8, conv_2], axis=3)
conv_8 = Conv2D(32, (3, 3), activation='relu', padding='same')(up_8)
conv_8 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv_8)

up_9 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv_8)
up_9 = concatenate([up_9, conv_1], axis=3)
conv_9 = Conv2D(16, (3, 3), activation='relu', padding='same')(up_9)
conv_9 = Conv2D(16, (3, 3), activation='relu', padding='same')(conv_9)

# TODO: Drop-out layers at the end of the contracting path perform further implicit data augmentation.

conv_10 = Conv2D(1, (1, 1), activation='sigmoid')(conv_9)

model = Model(inputs=[inputs], outputs=[conv_10])

optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=[])

model.summary()

In [None]:
early = EarlyStopping(patience=3, verbose=1)
checkpoint = ModelCheckpoint('keras_unet_01.h', verbose=1, save_best_only=True)

result = model.fit_generator(datagen.flow(train_X, train_Y), 
                             validation_split=0.2,
                             batch_size=BATCH_SIZE, 
                             epochs=NUM_EPOCHS, 
                             steps_per_epoch=STEPS_PER_EPOCH, 
                             callbacks=[early, checkpoint])

# Prediction

In [None]:
model = load_model('keras_unet_01.h')
predictions = model.predict(test_X, verbose=1)

In [None]:
print(predictions[0])

In [None]:
preds = np.squeeze(predictions)

In [None]:
print(preds[0])
print(preds.shape)

In [None]:
index_ = random.randint(0, len(test_X))
test_image = test_X[index_]

plt.imshow(test_image)
plt.show()

prediction = preds[index_]

plt.imshow(prediction)
plt.show()

# Submission

[Using rakhlin's Fast Run-Length Encoding (Python)](https://www.kaggle.com/rakhlin/fast-run-length-encoding-python)

In [None]:
def rle_encoding(x):
    '''
    x: numpy array of shape (height, width), 1 - mask, 0 - background
    Returns run length as list
    '''
    dots = np.where(x.T.flatten() == 1)[0] # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if (b > prev + 1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
        
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)

In [None]:
# resizing the predictions to original sizea
preds_resized = []
for index_, pred in enumerate(preds):
    image = resize(pred, test_image_sizes[index_], mode='constant', preserve_range=True)
    preds_resized.append(image)

new_test_ids = []
rles = []
for n, id_ in enumerate(test_ids):
    rle = list(prob_to_rles(preds_resized[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))

In [None]:
submission = pd.DataFrame()
submission['ImageId'] = new_test_ids
submission['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
submission.to_csv('keras_unet_01.csv', index=False)