### Lung Segmentation from RANZCR Chest X-rays 

[radda](https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification/discussion/207183) kindly provided lung masks for the RANZCR training data. Lung masks are believed to be critical in order to successfully detect intubation/catheter malpositions.
As suggested I build my own UNet model, that can map the x-ray chest of the competition data to lung masks.

This notebook illustrates a simple Keras model to learn the lung-mask and leverages [qubvel's segmentation models](https://github.com/qubvel/segmentation_models) . The power of Transfer learning is used for the image segmentation - the UNet network has been pretrained with ImageNet. The performance is much better than my earlier own custom network trained from scratch: [notebook](https://www.kaggle.com/philippschwarz/ranzcr-lung-mask-model-not-pretrained)

In [None]:
# First, installing the dependencies
!pip install -U ../input/kerasapplications/Keras_Applications-1.0.8-py3-none-any.whl
!pip install ../input/qubvel/efficientnet-1.0.0-py3-none-any.whl
!pip install ../input/qubvel/image_classifiers-1.0.0-py3-none-any.whl

# Now, installing segmentation_models (short for 'sm')
!pip install ../input/qubvel-segmentation-model-keras-v101/segmentation_models-master

# sm can work with both Keras and Tensorflow.
# By default, it look for keras.
# But, with Keras, it's giving error during the import. 
# So, we will be using Tensorflow as the backend for sm.
%env SM_FRAMEWORK=tf.keras

In [None]:
import numpy as np
import pandas as pd 
import cv2
import os
import matplotlib.pyplot as plt

from keras import backend as K
from keras.callbacks import CSVLogger, ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau

import keras
import json
import tqdm
from segmentation_models.losses import bce_jaccard_loss
from segmentation_models.metrics import iou_score
import gc
from segmentation_models import Unet
import segmentation_models  as sm
from sklearn.model_selection import train_test_split
from keras.utils import Sequence
from keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')
print(os.listdir('../input'))

In [None]:
ctr = pd.read_csv('../input/ranzcr-clip-lung-contours/RANZCR_CLiP_lung_contours.csv')
train = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')

In [None]:
from pathlib import Path
import ast

DIMENSION =  (256, 256) #(128, 128)  #
IMG_HEIGHT, IMG_WIDTH = DIMENSION

SEED = 25
BATCH_SIZE = 32
EPOCHS = 12

BACKBONE = 'seresnet34'

TRAIN_PATH = '../input/ranzcr-clip-catheter-line-classification/train/'
IMAGE_LIB = TRAIN_PATH
all_images = os.listdir(TRAIN_PATH)[:3000]
all_images = [Path(e).stem for e in all_images]

In [None]:
len(all_images)

In [None]:
def load_mask(StudyInstanceUID):
    img = cv2.imread('../input/ranzcr-clip-catheter-line-classification/train/'+StudyInstanceUID+'.jpg',-1)
    ctr_left = ast.literal_eval(ctr.loc[ctr.StudyInstanceUID==StudyInstanceUID,'left_lung_contour'].values[0])
    ctr_right = ast.literal_eval(ctr.loc[ctr.StudyInstanceUID==StudyInstanceUID,'right_lung_contour'].values[0])
    img = cv2.drawContours(img, np.array([[np.array(x) for x in ctr_left]]), 0, (255), -1)
    img = cv2.drawContours(img, np.array([[np.array(x) for x in ctr_right]]), 0, (255), -1)
    img = np.where(img>=255, 1.0, 0.0)
    return img

In [None]:
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose, RandomGamma, Rotate,IAAAffine
)

aug_null = Compose([])
aug = Compose([ 
    Blur(p=0.5, blur_limit=2),
    IAAAffine(p=0.5, shear=5),
    HorizontalFlip(p=0.5),              
    #VerticalFlip(p=0.5),              
    Rotate(limit=5, p=0.3),
    #CLAHE(p=0.3),
    RandomContrast(p=0.2, limit=0.1),
    RandomBrightness(p=0.2, limit=0.1),
    #RandomGamma(p=0.2, gamma_limit=(90, 110))
])

In [None]:
preprocess_input = sm.get_preprocessing(BACKBONE)

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, image_filenames, batch_size=32,
                 dim=DIMENSION,  shuffle=True, 
                 preprocess_input=preprocess_input, 
                 aug=aug_null, min_mask=2 ):
        'Initialization'
        self.image_filenames = image_filenames
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.preprocess_input = preprocess_input
        self.aug = aug
        self.on_epoch_end()
        self.dim = dim

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor((len(self.image_filenames) / self.batch_size) / 1) )

    
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        
        end_index = min((index+1)*self.batch_size, len(self.indexes))
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]


        # Generate data
        X, Y = self.__data_generation(indexes)

        return X, Y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.image_filenames))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        
        batch_size = len(indexes)
        
        # Initialization
        XX = np.empty((batch_size, self.dim[1], self.dim[0], 3), dtype='float32')
        YY = np.empty((batch_size, self.dim[1], self.dim[0], 1), dtype='float32')

            
        # Generate data
        for i, ID in enumerate(indexes):
            # Read image
            im = cv2.imread(IMAGE_LIB + all_images[ID] +'.jpg')
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            im = im.astype(np.float32) / 255.
            im = cv2.resize(im, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_LANCZOS4)    
            
            # Read mask
            mask = load_mask(all_images[ID])
            mask = cv2.resize(mask, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_NEAREST)
            
            # Store class
            augmented = self.aug(image=im, mask=mask)
            aug_img = augmented['image']
            aug_mask = augmented['mask']
            aug_mask = np.expand_dims(aug_mask, axis=-1)
            
            assert (np.max(aug_mask)<= 1.0 and  np.min(aug_mask) >= 0)
            aug_mask[aug_mask>=0.5] = 1
            aug_mask[aug_mask<0.5] = 0
            
            YY[i,] = aug_mask.astype('float32')
            XX[i,] = aug_img.astype('float32')
    
       
        XX = self.preprocess_input(XX)
            
        return XX, YY

In [None]:
trn_idx, val_idx = train_test_split(all_images, test_size = 0.2, random_state = SEED)

In [None]:
train_generator = DataGenerator(
    trn_idx, 
    batch_size=BATCH_SIZE, 
    dim=DIMENSION,
    aug=aug, 
    preprocess_input=preprocess_input
)

val_generator = DataGenerator(
    val_idx, 
    batch_size=BATCH_SIZE, 
    dim=DIMENSION,
    aug=aug_null, 
    preprocess_input=preprocess_input,
    shuffle=False
)

In [None]:
x, y= train_generator[7]
np.max(x), x.shape, y.shape, np.max(y), np.unique(y)

In [None]:
np.unique(load_mask(ctr.StudyInstanceUID.tolist()[1234]))

In [None]:
image_batch, mask_batch = train_generator[1]
fix, ax = plt.subplots(8,2, figsize=(10,20))
for i in range(8):
    ax[i,0].imshow(image_batch[i,:,:,0])
    ax[i,1].imshow(mask_batch[i,:,:,0])
    ax[i, 0].axis('off')
    ax[i, 1].axis('off')
plt.show()

In [None]:
y.shape

In [None]:
plt.imshow(x[10, ..., 0])
plt.show()

In [None]:
plt.imshow(y[10, ..., 0])
plt.show()

In [None]:
from segmentation_models import Unet
model = Unet(backbone_name=BACKBONE, encoder_weights='imagenet', activation='sigmoid', classes=1, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

In [None]:
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

In [None]:
model.compile(Adam(lr = 0.001), loss=bce_jaccard_loss, metrics=[dice_coef, iou_score])

In [None]:
reduce_lr = ReduceLROnPlateau(factor=0.5, patience=5, min_lr=0.000001, verbose=1, monitor='val_dice_coeff', mode='max')

early_stopping = EarlyStopping(patience=10, verbose=1, monitor='val_dice_coeff', mode='max')
model_checkpoint = ModelCheckpoint("unet_custom_128-128_{epoch:02d}-{val_loss:.3f}.hdf5", 
                                   save_weights_only=True, 
                                   monitor='val_dice_coeff', verbose=1, mode='max', period=1)

history = model.fit_generator( train_generator,
                            validation_data=val_generator,
                            epochs=EPOCHS,
                            callbacks=[reduce_lr, early_stopping, model_checkpoint], 
                            verbose=1)

In [None]:
class MyJsonEncoder(json.JSONEncoder):
    def default(self, obj):
        #if isinstance(obj, np.integer):
        #    return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        #if isinstance(obj, np.ndarray):
        #    return obj.tolist()
        return super(MyJsonEncoder, self).default(obj)


with open('history.json', 'w') as f:
    json.dump(history.history, f, cls=MyJsonEncoder)
    
history_df = pd.DataFrame(history.history)
history_df.head(2)


In [None]:
fig, ax = plt.subplots(1,3,figsize=(20,4))
history_df.val_loss.plot(ax=ax[0], color='red', title='Validation_loss',ylim=(0,5))
history_df.val_iou_score.plot(ax=ax[1], color='blue', title='Validation_IOU', )
history_df.val_dice_coef.plot(ax=ax[2], color='green', title='Validation_Dice_Coef');

In [None]:
n = 0
y_hat = model.predict(image_batch)
fig, ax = plt.subplots(1,3,figsize=(12,6))
ax[0].imshow(image_batch[n,:,:,0], cmap='gray')
ax[1].imshow(mask_batch[n,:,:,0])
ax[2].imshow(y_hat[n,:,:,0]);

### Test quality of masks on test dataset

In [None]:
TEST_PATH = '../input/ranzcr-clip-catheter-line-classification/test/'
name = '1.2.826.0.1.3680043.8.498.10023042737818625910026668901358652653'
im = cv2.imread(TEST_PATH + name +'.jpg', cv2.IMREAD_UNCHANGED).astype("int16").astype('float32')
im = cv2.resize(im, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_LANCZOS4)
im = (im - np.min(im)) / (np.max(im) - np.min(im))
im = im.reshape(1,IMG_WIDTH, IMG_HEIGHT, 1 )

y_hat = model.predict(im)
fig, ax = plt.subplots(1,2,figsize=(12,4))
ax[0].imshow(im[0,:,:,0], cmap='gray')
ax[1].imshow(y_hat[0,:,:,0]);