# Dataset


in Google Colab

In [1]:
# !pip install -q kaggle

# !mkdir ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

# !kaggle datasets download -d nikhilpandey360/chest-xray-masks-and-labels

# !unzip -q /content/chest-xray-masks-and-labels.zip
# !rm /content/chest-xray-masks-and-labels.zip

In [2]:
!pip install mlflow

# libraries

In [3]:
import os
import warnings
import sys
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
%matplotlib inline
import numpy as np
from urllib.parse import urlparse
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications.vgg16 import VGG16
import mlflow
import mlflow.tensorflow

In [4]:
warnings.filterwarnings("ignore")
mlflow.autolog()

# data

## Data Path

In [5]:
## In Colab
# image_path_train = '/content/Lung Segmentation/CXR_png/'
# mask_path_train = '/content/Lung Segmentation/masks/'
# image_path_test = '/content/Lung Segmentation/test/'

## Kaggle
image_path_train = '../input/chest-xray-masks-and-labels/Lung Segmentation/CXR_png/'
mask_path_train = '../input/chest-xray-masks-and-labels/Lung Segmentation/masks/'
image_path_test = '../input/chest-xray-masks-and-labels/Lung Segmentation/test/'

In [6]:
images = os.listdir(image_path_train)
mask = os.listdir(mask_path_train)
mask = [fName.split(".png")[0] for fName in mask]
image_file_name = [fName.split("_mask")[0] for fName in mask]

In [7]:
check = [i for i in mask if "mask" in i]
print("Total mask that has modified name:", len(check))

In [8]:
testing_files = set(os.listdir(image_path_train)) & set(os.listdir(mask_path_train))
training_files = check

## Load functions

In [9]:
def getData(X_shape, flag = "MONT"):
    im_array = []
    mask_array = []
    shape = (X_shape, X_shape)
    # X_shape = image_size
    if flag == "MONT":
        for i in tqdm(testing_files): 
            
            # im.shape = (X_shape, X_shape, 1)
            im = cv2.imread(os.path.join(image_path_train, i))
            im = cv2.resize(im, shape)
            # im = cv2.equalizeHist(im)
            # mask.shape = (X_shape, X_shape, 1)
            mask = cv2.imread(os.path.join(mask_path_train, i))
            mask = cv2.resize(mask, shape)[:, :, 0]
            
            im_array.append(im)
            mask_array.append(mask)
    
    if flag == "SHEN":
        for i in tqdm(training_files): 
            
            # im.shape = (X_shape, X_shape, 1)
            im = cv2.imread(os.path.join(image_path_train, i.split("_mask")[0] + ".png"))
            im = cv2.resize(im, shape)
            # im = cv2.equalizeHist(im)
            # mask.shape = (X_shape, X_shape, 1)
            mask = cv2.imread(os.path.join(mask_path_train, i + ".png"))
            mask = cv2.resize(mask, shape)[:, :, 0]
            
            im_array.append(im)
            mask_array.append(mask)
    # return list
    return im_array, mask_array

In [10]:
def get_test(X_shape, n_samples = 100):
    im_array = []
    shape = (X_shape, X_shape)
    test_files = random.choices(list(os.listdir(image_path_test)), k=n_samples)
    for i in tqdm(test_files):
        im = cv2.imread(os.path.join(image_path_test, i))
        im = cv2.resize(im, shape)
        # im = cv2.equalizeHist(im)
        im_array.append(im)
    return im_array

## loading data

In [11]:
dim, n_samples = 256, 50 # n_samples = [1, 96]

image_shen, mask_shen = getData(dim, flag = "SHEN")
image_mont, mask_mont = getData(dim, flag = "MONT")
X_test = get_test(dim, n_samples = n_samples)

In [12]:
image_shen = np.array(image_shen).reshape(len(image_shen), dim, dim, 3)
mask_shen = np.array(mask_shen).reshape(len(mask_shen), dim, dim, 1)

image_mont = np.array(image_mont).reshape(len(image_mont), dim, dim, 3)
mask_mont = np.array(mask_mont).reshape(len(mask_mont), dim, dim, 1)

X_test = np.array(X_test).reshape(len(X_test), dim, dim, 3)

## visualize data

In [13]:
print(image_shen.shape, mask_shen.shape)
print(image_mont.shape, mask_mont.shape)
print(X_test.shape)

In [14]:
i = 25
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(9, 13))
axs[0, 0].imshow(image_shen[i])
axs[0, 1].imshow(mask_shen[i].reshape(256, 256), cmap='gray')
axs[0, 0].set_ylabel('Shenzhen')

axs[1, 0].imshow(image_mont[i])
axs[1, 1].imshow(mask_mont[i].reshape(256, 256), cmap='gray')
axs[1, 0].set_ylabel('Montgomery')

axs[2, 0].imshow(X_test[i])
axs[2, 0].set_ylabel('NIH')

axs[0, 0].set_title('CXR')
axs[1, 0].set_title('CXR')
axs[2, 0].set_title('CXR')

axs[0, 1].set_title('mask')
axs[1, 1].set_title('mask')

fig.delaxes(axs[2, 1])

## split data

In [15]:
images = np.concatenate((image_shen, image_mont), axis=0)
masks  = np.concatenate((mask_shen, mask_mont), axis=0)

print(images.shape, masks.shape)

In [16]:
X_train, X_val, Y_train, Y_val = train_test_split(images / 255.0, 
                                                  masks / 255., 
                                                  test_size = 0.15, 
                                                  random_state = 2018)
X_testNorm = X_test / 255.0

# segmentation models

In [17]:
!pip install -q segmentation-models==1.0.1

In [18]:
import segmentation_models as sm

sm.set_framework('tf.keras')
sm.framework()

In [19]:
model = sm.Unet('vgg16', classes=1, 
                activation='sigmoid', 
                encoder_weights='imagenet')

In [20]:
model.summary()

In [21]:
loss = sm.losses.DiceLoss(class_weights=np.array([0.5, 0.5]))

metrics = [sm.metrics.IOUScore(threshold=0.5), 
           sm.metrics.FScore(threshold=0.5), 'acc']

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [22]:
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [23]:
res = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), 
                batch_size=32, epochs=50)

In [27]:
model.save('VGG_UNET.h5')

from IPython.display import FileLink 
FileLink(r'./VGG_UNET.h5')

In [29]:
!zip -r -q mlruns_VGG.zip ./mlruns


from IPython.display import FileLink 
FileLink(r'./mlruns_VGG.zip')

In [24]:
preds = model.predict(X_testNorm)

In [25]:
fig, axs = plt.subplots(nrows=5, ncols=2, figsize=(10, 20))

for i in range(5):
    for j in range(2):
        if j != 1:
            axs[i, j].imshow(X_testNorm[i])
            axs[i, j].set_title('CXR')
        else:
            axs[i, j].imshow(preds[i].reshape(256, 256), cmap='gray')
            axs[i, j].set_title('predicted mask')