### Lung Segmentation from RANZCR Chest X-rays 

[radda](https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification/discussion/207183) kindly provided lung masks for the RANZCR training data. Lung masks are believed to be critical in order to successfully detect intubation/catheter malpositions.
As suggested I build my own UNet model, that can map the x-ray chest of the competition data to lung masks.

This notebook illustrates a simple custom Keras model to learn the lung-mask and largely follows [Peter Grenholm's ](https://www.kaggle.com/toregil/a-lung-u-net-in-keras) structure. The network is trained from scratch and does not use imagenet weights. I am not quite satisfied with the performance, therefore next I will try using qubvel's segmentation-model-keras that leverage pretrained Unet-models and work nicely with the image augmentation library albumentation.

Updated and improved notebook using transfer learning [here](https://www.kaggle.com/philippschwarz/ranzcr-lung-mask-transfer-learning)

In [None]:
import os
import numpy as np
import pandas as pd 
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from pathlib import Path
import ast
from tqdm import tqdm_notebook, tqdm

In [None]:
from keras.models import Model
from keras.layers import *
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator
import keras.backend as K
from keras.callbacks import  CSVLogger, ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from keras.losses import binary_crossentropy

In [None]:
TRAIN_PATH = '../input/ranzcr-clip-catheter-line-classification/train/'
IMAGE_LIB = TRAIN_PATH
IMG_HEIGHT, IMG_WIDTH = 128, 128
SEED=42
NUM_SAMPLES = 4000
BATCH_SIZE = 32
EPOCHS = 20
ctr = pd.read_csv('../input/ranzcr-clip-lung-contours/RANZCR_CLiP_lung_contours.csv')

### Helper Function to read masks

In [None]:
def load_mask(StudyInstanceUID):
    img = cv2.imread(IMAGE_LIB+StudyInstanceUID+'.jpg',-1)
    ctr_left = ast.literal_eval(ctr.loc[ctr.StudyInstanceUID==StudyInstanceUID,'left_lung_contour'].values[0])
    ctr_right = ast.literal_eval(ctr.loc[ctr.StudyInstanceUID==StudyInstanceUID,'right_lung_contour'].values[0])
    img = cv2.drawContours(img, np.array([[np.array(x) for x in ctr_left]]), 0, (255), -1)
    img = cv2.drawContours(img, np.array([[np.array(x) for x in ctr_right]]), 0, (255), -1)
    img = np.where(img>=255, 1.0, 0.0)
    return img

radda  explained how to load the masks in this [notebook](https://www.kaggle.com/raddar/simple-lung-contour-visualization). I updated the script such that the background is encoded 0 and the mask is 1.

In [None]:
all_images = os.listdir(TRAIN_PATH)[:NUM_SAMPLES]
all_images = [Path(e).stem for e in all_images]

### Load images and masks into memory

In [None]:
x_data = np.empty((len(all_images), IMG_HEIGHT, IMG_WIDTH), dtype='float32')
for i, name in enumerate(tqdm(all_images)):
    im = cv2.imread(IMAGE_LIB + name +'.jpg', cv2.IMREAD_UNCHANGED).astype("int16").astype('float32')
    im = cv2.resize(im, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_LANCZOS4)
    im = (im - np.min(im)) / (np.max(im) - np.min(im))
    x_data[i] = im

y_data = np.empty((len(all_images), IMG_HEIGHT, IMG_WIDTH), dtype='float32')
for i, name in enumerate(tqdm(all_images)):
    im = load_mask(name)
    im = cv2.resize(im, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_NEAREST)
    y_data[i] = im


### Verify visually that images and masks are correct

In [None]:
fig, ax = plt.subplots(1,2, figsize = (8,4))
ax[0].imshow(x_data[0], cmap='gray')
ax[1].imshow(y_data[0], cmap='gray')
plt.show()

In [None]:
x_data = x_data[:,:,:,np.newaxis]
y_data = y_data[:,:,:,np.newaxis]
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size = 0.5)

In [None]:
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred = K.cast(y_pred, 'float32')
    y_pred_f = K.cast(K.greater(K.flatten(y_pred), 0.5), 'float32')
    intersection = y_true_f * y_pred_f
    score = 2. * K.sum(intersection) / (K.sum(y_true_f) + K.sum(y_pred_f))
    return score

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)

In [None]:
input_layer = Input(shape=x_train.shape[1:])
c1 = Conv2D(filters=8, kernel_size=(3,3), activation='relu', padding='same')(input_layer)
l = MaxPool2D(strides=(2,2))(c1)
c2 = Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same')(l)
l = MaxPool2D(strides=(2,2))(c2)
c3 = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(l)
l = MaxPool2D(strides=(2,2))(c3)
c4 = Conv2D(filters=32, kernel_size=(1,1), activation='relu', padding='same')(l)
l = concatenate([UpSampling2D(size=(2,2))(c4), c3], axis=-1)
l = Conv2D(filters=32, kernel_size=(2,2), activation='relu', padding='same')(l)
l = concatenate([UpSampling2D(size=(2,2))(l), c2], axis=-1)
l = Conv2D(filters=24, kernel_size=(2,2), activation='relu', padding='same')(l)
l = concatenate([UpSampling2D(size=(2,2))(l), c1], axis=-1)
l = Conv2D(filters=16, kernel_size=(2,2), activation='relu', padding='same')(l)
l = Conv2D(filters=64, kernel_size=(1,1), activation='relu')(l)
l = Dropout(0.5)(l)
output_layer = Conv2D(filters=1, kernel_size=(1,1), activation='sigmoid')(l)
                                                         
model = Model(input_layer, output_layer)

In [None]:
def my_generator(x_train, y_train, batch_size):
    data_generator = ImageDataGenerator(
            width_shift_range=0.1,
            height_shift_range=0.1,
            rotation_range=10,
            zoom_range=0.1).flow(x_train, x_train, batch_size, seed=SEED)
    mask_generator = ImageDataGenerator(
            width_shift_range=0.1,
            height_shift_range=0.1,
            rotation_range=10,
            zoom_range=0.1).flow(y_train, y_train, batch_size, seed=SEED)
    while True:
        x_batch, _ = data_generator.next()
        y_batch, _ = mask_generator.next()
        yield x_batch, y_batch


In [None]:
image_batch, mask_batch = next(my_generator(x_train, y_train, 8))
fix, ax = plt.subplots(8,2, figsize=(8,20))
for i in range(8):
    ax[i,0].imshow(image_batch[i,:,:,0])
    ax[i,1].imshow(mask_batch[i,:,:,0])
plt.show()


In [None]:
model.compile(optimizer=Adam(2e-4), loss=bce_dice_loss, metrics=[dice_coef, binary_crossentropy])

In [None]:

early_stopping = EarlyStopping(patience=10, verbose=1, monitor='val_dice_coeff', mode='max')
model_checkpoint = ModelCheckpoint("unet_custom_128-128_{epoch:02d}-{val_loss:.3f}.hdf5", 
#                                    save_best_only=True, 
                                   save_weights_only=True, 
                                   monitor='val_dice_coeff', verbose=1, mode='max', period=2)
reduce_lr = ReduceLROnPlateau(factor=0.5, patience=5, min_lr=0.000001, verbose=1, monitor='val_dice_coeff', mode='max')

hist = model.fit_generator(my_generator(x_train, y_train, batch_size = BATCH_SIZE),
                           steps_per_epoch = NUM_SAMPLES//BATCH_SIZE,
                           validation_data = (x_val, y_val),
                           epochs=EPOCHS,  
                           callbacks=[ reduce_lr, model_checkpoint], # early_stopping
                           verbose=1)

In [None]:
import json 

class MyJsonEncoder(json.JSONEncoder):
    def default(self, obj):
        #if isinstance(obj, np.integer):
        #    return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        #if isinstance(obj, np.ndarray):
        #    return obj.tolist()
        return super(MyJsonEncoder, self).default(obj)


with open('history.json', 'w') as f:
    json.dump(hist.history, f, cls=MyJsonEncoder)
    
history_df = pd.DataFrame(hist.history)
history_df.head(2)



In [None]:
fig, ax = plt.subplots(1,3,figsize=(20,4))
history_df.val_loss.plot(ax=ax[0], color='red', title='Validation Loss',ylim=(0,5))
history_df.val_dice_coef.plot(ax=ax[1], color='blue', title='Validation binary_crossentropy', )
history_df.val_binary_crossentropy.plot(ax=ax[2], color='green', title='Validation Dice_Coef');

In [None]:
plt.imshow(model.predict(x_train[2].reshape(1,IMG_HEIGHT, IMG_WIDTH, 1))[0,:,:,0], cmap='gray');

In [None]:
n = 10
y_hat = model.predict(x_val)
fig, ax = plt.subplots(1,3,figsize=(12,6))
ax[0].imshow(x_val[n,:,:,0], cmap='gray')
ax[1].imshow(y_val[n,:,:,0])
ax[2].imshow(y_hat[n,:,:,0]);

1. ### Test quality of masks on test dataset

In [None]:
TEST_PATH = '../input/ranzcr-clip-catheter-line-classification/test/'
name = '1.2.826.0.1.3680043.8.498.10023042737818625910026668901358652653'
im = cv2.imread(TEST_PATH + name +'.jpg', cv2.IMREAD_UNCHANGED).astype("int16").astype('float32')
im = cv2.resize(im, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_LANCZOS4)
im = (im - np.min(im)) / (np.max(im) - np.min(im))
im = im.reshape(1,IMG_WIDTH, IMG_HEIGHT, 1 )

y_hat = model.predict(im)
fig, ax = plt.subplots(1,2,figsize=(12,4))
ax[0].imshow(im[0,:,:,0], cmap='gray')
ax[1].imshow(y_hat[0,:,:,0]);