# Segmentation with U-Net

In this notebook, we will be working the 2018 Data Science Bowl competetion dataset for cell nuclei segmentation. The original dataset can be found at [2018 Data Science Bowl](https://www.kaggle.com/c/data-science-bowl-2018). In the original dataset, the masks of individual nucleus segementations for same image are saved as different png files. Since we will be performing binary segmentation, for convenience, the masks of different nulcei have been merged for each image and the new dataset is available at [Nuclei Images Masks from DSB 2018](https://www.kaggle.com/sinjoysaha/nucleiimagesmasksfromdsb2018).
The new folder structure is as follows:
- nucleiimagesmasksfromdsb2018
    - data-science-bowl-2018
        - stage1_test
            - <id_like_0dfg21dfg1...>
                - images
                    - \<same_id>.png
                    
        - stage1_train
            - same_id
                - images
                    - \<same_id>.png
                - masks
                    - \<same_id>.png
                    
        - stage1_train_combinedmasks
            - \<same_id>.png
           
For entirety, the code for merging of masks is also provided in comment blocks. Understanding it is upto the reader.

In [None]:
# Importing necessary libraries
import os
import numpy as np
from tqdm import tqdm
from skimage.io import imread, imshow
from skimage.transform import resize
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow.keras.layers import (Input, Lambda, Conv2D, Dropout, MaxPooling2D, 
                                    Conv2DTranspose, concatenate)

from tensorflow.keras import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard

In [None]:
TRAIN_PATH = '../input/nucleiimagesmasksfromdsb2018/data-science-bowl-2018/stage1_train/'
TRAINMASKS_PATH = '../input/nucleiimagesmasksfromdsb2018/data-science-bowl-2018/stage1_train_combinedmasks/'
TEST_PATH = '../input/nucleiimagesmasksfromdsb2018/data-science-bowl-2018/stage1_test/'

In [None]:
# Params
IMG_WIDTH = 128
IMG_HEIGHT = 128
IMG_CHANNELS = 3

# Data Preprocessing

In [None]:
train_ids = next(os.walk(TRAIN_PATH))[1]
test_ids = next(os.walk(TEST_PATH))[1]
print(len(train_ids), len(test_ids))

In [None]:
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)

In [None]:
# Train images
print('Resizing training images and masks')
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    path = TRAIN_PATH + id_
    img = imread(path+'/images/'+id_+'.png')[:,:,:IMG_CHANNELS]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_train[n] = img
    '''
    # This code is only needed to combine masks of diff cells into one mask
    # Adding diff masks for diff cells into one mask
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    for mask_file in next(os.walk(path+'/masks/'))[2]:
        mask_ = imread(path+'/masks/'+mask_file)
        mask_ = np.expand_dims(resize(
                mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True),
                              axis=-1)
        mask = np.maximum(mask, mask_)
        
    y_train[n] = mask'''
    mask = imread(TRAINMASKS_PATH+id_+'.png')
    mask = np.expand_dims(resize(
                mask, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True),
                              axis=-1)
    y_train[n] = mask

In [None]:
'''
# Code to Save the masks and zip it for downloading
os.makedirs('../outputs/train_masks')
def img_frombytes(data):
    size = (data.shape[0],data.shape[1])
    databytes = np.packbits(data, axis=1)
    return Image.frombytes(mode='1', size=size, data=databytes)

for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    print(train_ids[n],end=' ')
    im = img_frombytes((y_train[n]*255).astype(np.uint8))
    im.save('../outputs/train_masks/'+train_ids[n]+'.png')
    print(' Saved to '+'../outputs/train_masks/'+train_ids[n]+'.png\n')
    
import shutil
OUTPUT_NAME = 'download_folder'
DIRECTORY_TO_ZIP = '../outputs/train_masks/'
shutil.make_archive(OUTPUT_NAME, 'zip', DIRECTORY_TO_ZIP)
'''

In [None]:
i = 10
plt.subplot(121)
imshow(X_train[i])
plt.title('Image')
plt.subplot(122)
imshow(np.squeeze(y_train[i]))
plt.title('Mask')
plt.show()

In [None]:
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
sizes_test = []

In [None]:
# Test images
print('Resizing test images and masks')
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TEST_PATH + id_
    img = imread(path+'/images/'+id_+'.png')[:,:,:IMG_CHANNELS]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

# Building the U-Net Model

In [None]:
# Inputs
inputs = Input((IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS))
# Change integer to float and also scale pixel values
s = Lambda(lambda x: x/255.0)(inputs)

# Contraction/Encoder path
# Block 1
c1 = Conv2D(filters=16, kernel_size=(3,3),
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(s)
c1 = Dropout(0.1)(c1)
c1 = Conv2D(filters=16, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c1)
p1 = MaxPooling2D(pool_size=(2,2))(c1)
# Block 2
c2 = Conv2D(filters=32, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(p1)
c2 = Dropout(0.1)(c2)
c2 = Conv2D(filters=32, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c2)
p2 = MaxPooling2D(pool_size=(2,2))(c2)
# Block 3
c3 = Conv2D(filters=64, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(p2)
c3 = Dropout(0.2)(c3)
c3 = Conv2D(filters=64, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c3)
p3 = MaxPooling2D(pool_size=(2,2))(c3)
# Block 4
c4 = Conv2D(filters=128, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(p3)
c4 = Dropout(0.2)(c4)
c4 = Conv2D(filters=128, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c4)
p4 = MaxPooling2D(pool_size=(2,2))(c4)
# Block 5
c5 = Conv2D(filters=256, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(p4)
c5 = Dropout(0.3)(c5)
c5 = Conv2D(filters=256, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c5)

# Expansion/Decoder path
# Block 6
u6 = Conv2DTranspose(filters=128, kernel_size=(2,2), strides = (2,2), padding='same')(c5)
u6 = concatenate([u6, c4])
c6 = Conv2D(filters=128, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(u6)
c6 = Dropout(0.2)(c6)
c6 = Conv2D(filters=128, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c6)

# Block 7
u7 = Conv2DTranspose(filters=64, kernel_size=(2,2), strides = (2,2), padding='same')(c6)
u7 = concatenate([u7, c3])
c7 = Conv2D(filters=64, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(u7)
c7 = Dropout(0.2)(c7)
c7 = Conv2D(filters=64, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c7)

# Block 8
u8 = Conv2DTranspose(filters=32, kernel_size=(2,2), strides = (2,2), padding='same')(c7)
u8 = concatenate([u8, c2])
c8 = Conv2D(filters=32, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(u8)
c8 = Dropout(0.1)(c8)
c8 = Conv2D(filters=32, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c8)

# Block 9
u9 = Conv2DTranspose(filters=16, kernel_size=(2,2), strides = (2,2), padding='same')(c8)
u9 = concatenate([u9, c1])
c9 = Conv2D(filters=16, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(u9)
c9 = Dropout(0.1)(c9)
c9 = Conv2D(filters=16, kernel_size=(3,3), 
                            activation='relu', kernel_initializer='he_normal',
                           padding='same')(c9)
# Outputs
outputs = Conv2D(filters=1, kernel_size=(1,1), 
                            activation='sigmoid')(c9)

model = Model(inputs=[inputs], outputs=[outputs])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Model Training

In [None]:
# Callbacks
callbacks_list = [ModelCheckpoint('nuclei_model.h5', verbose=1, save_best_only=True),
                  EarlyStopping(patience=2, monitor='val_loss'),
                  TensorBoard(log_dir='logs')]

In [None]:
model_results = model.fit(X_train, y_train, validation_split=0.1, batch_size=32, 
                          epochs=25, callbacks=callbacks_list)

In [None]:
plt.figure(figsize=[10,6])
for key in model_results.history.keys():
    plt.plot(model_results.history[key], label=key)
    
plt.legend()
plt.show()

Further visualizations can be done using the `logs` in `output` folder. The `logs` was created using `TensorBoard`.

# Model Evaluation

In [None]:
preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
y_true_train = y_train[:int(y_train.shape[0]*0.9)]
preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
y_true_val = y_train[int(y_train.shape[0]*0.9):]
preds_test = model.predict(X_test, verbose=1)

In [None]:
# Thresholding
preds_train_t = (preds_train > 0.5).astype(np.uint8)
preds_val_t = (preds_val > 0.5).astype(np.uint8)
preds_test_t = (preds_test > 0.5).astype(np.uint8)

In [None]:
# Show images
def show_images(i, ti, orgimg, y_true, preds, preds_t):
    plt.figure(figsize=(8,8))
    plt.subplot(221)
    imshow(orgimg[i])
    plt.title('Image to be Segmented')
    plt.subplot(222)
    imshow(y_true[ti])
    plt.title('Segmentation Ground Truth')
    plt.subplot(223)
    imshow(preds[ti])
    plt.title('Predicted Segmentation')
    plt.subplot(224)
    imshow(preds_t[ti])
    plt.title('Thresholded Segmentation')
    plt.show()

In [None]:
# On Train
# train max 602
i = 602
show_images(i, i, X_train, y_true_train, preds_train, preds_train_t)

In [None]:
# On Val
# i = 603:669
i = 660
show_images(i, i-603,  X_train, y_true_val, preds_val, preds_val_t)

In [None]:
# On Test
# Ground Truths Not Available
i = 0
plt.figure(figsize=(8,8))
plt.subplot(221)
imshow(X_test[i])
plt.title('Image to be Segmented')
plt.subplot(222)
plt.title('Segmentation Ground Truth NA')
plt.subplot(223)
imshow(preds_test[i])
plt.title('Predicted Segmentation')
plt.subplot(224)
imshow(preds_test_t[i])
plt.title('Thresholded Segmentation')
plt.show()

In [None]:
print("Evaluate on val data")
results = model.evaluate(X_train[int(X_train.shape[0]*0.9):], y_train[int(y_train.shape[0]*0.9):], batch_size=128)
print("Test Loss:", results[0])
print("Test Acc :", results[1]*100, "%")

It would have been much better to evaluate on new test set other than validation set but here the data is too small to split into train, validation and test sets. One can extend this work by using techniques of data augmentation which may be considered in the next version of this notebook.