In [None]:
import os
import csv
import random
import pydicom
import numpy as np
import pandas as pd
from skimage import io
from skimage import measure
from skimage.transform import resize

import tensorflow as tf
from tensorflow import keras

from matplotlib import pyplot as plt
import matplotlib.patches as patches

In [None]:
# model configurations
class Configuration:
    EPOCHS = 5
    LEARNING_RATE = 0.001
    BATCH = 32
    IMAGE_SIZE = 64
    CHANNLES = 32
    N_BLOCKS = 2
    NN_DEPTH = 4
    WORKERS = 4
    TRAIN_LABELS_PATH = '../input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv'
    TRAIN_IMAGES_PATH = '../input/rsna-pneumonia-detection-challenge/stage_2_train_images'
    TEST_IMAGES_PATH = '../input/rsna-pneumonia-detection-challenge/stage_2_test_images'
    TRAIN_IMAGES_SIZE = 500 
    VALID_IMAGES_SIZE = 200

In [None]:
# create empty disctionary to store the pneumonia locations for all the unique files
pneumonia_locations = {}
# Open and load stage_2_train_labels.csv file to read all the records and add locations to dictionary for all uniqueue files
# Fields 
    # patientid, x, y, width, height and target
with open(os.path.join(Configuration.TRAIN_LABELS_PATH), mode='r') as infile:
    # open reader
    reader = csv.reader(infile)
    # skip header
    next(reader, None)
    # loop through rows
    for rows in reader:
        filename = rows[0] # get file name
        location = rows[1:5] # location as (x, y) point and width, height 
        target = rows[5] # it shows if the patient have pheumonia or not
        # if row contains pneumonia add location to dictionary
        # which contains a list of pneumonia locations per filename
        if target == '1':
            # convert string to float to int
            location = [int(float(i)) for i in location]
            # save pneumonia location in dictionary
            if filename in pneumonia_locations:
                pneumonia_locations[filename].append(location)
            else:
                pneumonia_locations[filename] = [location]

In [None]:
file_name = '00436515-870c-4b36-a041-de91049b9ab4'
img = pydicom.dcmread(os.path.join(Configuration.TRAIN_IMAGES_PATH, file_name +'.dcm')).pixel_array
msk = np.zeros(img.shape)
if filename in pneumonia_locations:
    # loop through pneumonia
    for location in pneumonia_locations[filename]:
        # add 1's at the location of the pneumonia
        x, y, w, h = location
        msk[y:y+h, x:x+w] = 1
img = resize(img, (Configuration.IMAGE_SIZE, Configuration.IMAGE_SIZE), mode='reflect')
msk = resize(msk, (Configuration.IMAGE_SIZE, Configuration.IMAGE_SIZE), mode='reflect') > 0.5
img = np.expand_dims(img, -1) 
msk = np.expand_dims(msk, -1) 

axidx = 0
f, axarr = plt.subplots(1, 2, figsize=(20,15))
axarr = axarr.ravel()
axarr[axidx].imshow(img[:, :, 0])
# threshold true mask
comp = msk[:, :, 0] > 0.5
# apply connected components
comp = measure.label(comp)
# apply bounding boxes
predictionString = ''
for region in measure.regionprops(comp):
    # retrieve x, y, height and width
    y, x, y2, x2 = region.bbox
    height = y2 - y
    width = x2 - x
    axarr[axidx].add_patch(patches.Rectangle((x,y),width,height,linewidth=2,edgecolor='b',facecolor='none'))
axidx += 1

In [None]:
# load and shuffle train images
folder = Configuration.TRAIN_IMAGES_PATH
filenames = os.listdir(folder)
random.shuffle(filenames)
# split into train and validation image filenames
train_filenames = filenames[:Configuration.TRAIN_IMAGES_SIZE]
valid_filenames = filenames[Configuration.TRAIN_IMAGES_SIZE:Configuration.VALID_IMAGES_SIZE + Configuration.TRAIN_IMAGES_SIZE]
print('number of train samples', len(train_filenames))
print('number of valid samples', len(valid_filenames))


In [None]:
print('Total train images:',len(filenames))
print('Images with pneumonia:', len(pneumonia_locations))

In [None]:
class generator(keras.utils.Sequence):
    
    # Construcor to initialize the object's properties
    def __init__(self, folder, filenames, pneumonia_locations=None, batch_size=32, image_size=256, shuffle=True, augment=False, predict=False):
        self.folder = folder
        self.filenames = filenames
        self.pneumonia_locations = pneumonia_locations
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.augment = augment
        self.predict = predict
        self.on_epoch_end()
        
    def __load__(self, filename):
        # load dicom file as numpy array
        img = pydicom.dcmread(os.path.join(self.folder, filename)).pixel_array
        # create empty mask
        msk = np.zeros(img.shape)
        # get filename without extension
        filename = filename.split('.')[0]
        # if image contains pneumonia
        if filename in self.pneumonia_locations:
            # loop through pneumonia
            for location in self.pneumonia_locations[filename]:
                # add 1's at the location of the pneumonia
                x, y, w, h = location
                msk[y:y+h, x:x+w] = 1
        # resize both image and mask
        img = resize(img, (self.image_size, self.image_size), mode='reflect')
        msk = resize(msk, (self.image_size, self.image_size), mode='reflect') > 0.5
        # if augment then horizontal flip half the time
        if self.augment and random.random() > 0.5: 
            img = np.fliplr(img)  
            msk = np.fliplr(msk) 
        # add trailing channel dimension
        img = np.expand_dims(img, -1) 
        msk = np.expand_dims(msk, -1) 
        # print('Load method img size {0} and msk size is {1}'.format(img.shape, msk.shape))
        return img, msk
    
    def __loadpredict__(self, filename):
        # load dicom file as numpy array
        img = pydicom.dcmread(os.path.join(self.folder, filename)).pixel_array
        # resize image
        img = resize(img, (self.image_size, self.image_size), mode='reflect')
        # add trailing channel dimension
        img = np.expand_dims(img, -1)
        #print('LoadPredict method img size {0}'.format(img.shape))
        return img
        
    def __getitem__(self, index):
        # select batch
        filenames = self.filenames[index*self.batch_size:(index+1)*self.batch_size]
        # predict mode: return images and filenames
        if self.predict:
            # load files
            imgs = [self.__loadpredict__(filename) for filename in filenames]
            # create numpy batch
            imgs = np.array(imgs)
            #print('GetItem method and predict is true img size {0} and total filenames are {1}'.format(img.shape, len(filenames)))
            return imgs, filenames
        # train mode: return images and masks
        else:
            # load files
            items = [self.__load__(filename) for filename in filenames]
            # unzip images and masks
            imgs, msks = zip(*items)
            # create numpy batch
            imgs = np.array(imgs)
            msks = np.array(msks)
            #print('GetItem method and predict is false img size {0} and mask size {1}'.format(imgs.shape, msks.shape))
            return imgs, msks
        
    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.filenames)
        
    def __len__(self):
        if self.predict:
            # return everything
            #print('Len method and predict is true and values {0}'.format(int(np.ceil(len(self.filenames) / self.batch_size))))
            return int(np.ceil(len(self.filenames) / self.batch_size))
        else:
            # return full batches only
            #print('Len method and predict is false and values {0}'.format(int(len(self.filenames) / self.batch_size)))
            return int(len(self.filenames) / self.batch_size)

In [None]:
def create_downsample(channels, inputs):
    x = keras.layers.BatchNormalization(momentum=0.9)(inputs)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(channels, 1, padding='same', use_bias=False)(x)
    x = keras.layers.MaxPool2D(2)(x)
    return x

def create_resblock(channels, inputs):
    x = keras.layers.BatchNormalization(momentum=0.9)(inputs)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(x)
    x = keras.layers.BatchNormalization(momentum=0.9)(x)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(x)
    return keras.layers.add([x, inputs])

def create_network(input_size, channels, n_blocks=2, depth=4):
    # input
    inputs = keras.Input(shape=(input_size, input_size, 1))
    x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(inputs)
    # residual blocks
    for d in range(depth):
        channels = channels * 2
        x = create_downsample(channels, x)
        for b in range(n_blocks):
            x = create_resblock(channels, x)
    # output
    x = keras.layers.BatchNormalization(momentum=0.9)(x)
    x = keras.layers.LeakyReLU(0)(x)
    x = keras.layers.Conv2D(1, 1, activation='sigmoid')(x)
    outputs = keras.layers.UpSampling2D(2**depth)(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
# define iou or jaccard loss function
def iou_loss(y_true, y_pred):
    y_true = tf.reshape(y_true, [-1])
    y_pred = tf.reshape(y_pred, [-1])
    intersection = tf.reduce_sum(y_true * y_pred)
    score = (intersection + 1.) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection + 1.)
    return 1 - score

# combine bce loss and iou loss
def iou_bce_loss(y_true, y_pred):
#     if type(y_true) == 'bool':
#         print(' type is bool')
#     print(y_true.dtype == 'bool')
#     print(y_true)
#     print(y_pred)
    if y_true.dtype == 'bool':
        y_true = tf.cast(y_true, tf.float32)
#     if y_pred.dtype == 'bool':
#         y_pred = tf.cast(y_pred, tf.float32)
    return 0.5 * keras.losses.binary_crossentropy(y_true, y_pred) + 0.5 * iou_loss(y_true, y_pred) 

# mean iou as a metric
def mean_iou(y_true, y_pred):
    y_pred = tf.round(y_pred)
    intersect = tf.reduce_sum(y_true * y_pred, axis=[1, 2, 3])
    union = tf.reduce_sum(y_true, axis=[1, 2, 3]) + tf.reduce_sum(y_pred, axis=[1, 2, 3])
    smooth = tf.ones(tf.shape(intersect))
    return tf.reduce_mean((intersect + smooth) / (union - intersect + smooth))

# create network and compiler
model = create_network(input_size = Configuration.IMAGE_SIZE, channels = Configuration.CHANNLES, n_blocks = Configuration.N_BLOCKS, depth = Configuration.NN_DEPTH)
model.compile(optimizer='adam',
              loss=iou_bce_loss,
              metrics=['accuracy', mean_iou])

# cosine learning rate annealing
def cosine_annealing(x):
    lr = Configuration.LEARNING_RATE
    epochs = Configuration.EPOCHS
    return lr*(np.cos(np.pi*x/epochs)+1.)/2
learning_rate = tf.keras.callbacks.LearningRateScheduler(cosine_annealing)

# create train and validation generators
folder = Configuration.TRAIN_IMAGES_PATH 
train_gen = generator(folder, train_filenames, pneumonia_locations, batch_size=Configuration.BATCH, image_size=Configuration.IMAGE_SIZE, shuffle=True, augment=True, predict=False)
valid_gen = generator(folder, valid_filenames, pneumonia_locations, batch_size=Configuration.BATCH, image_size=Configuration.IMAGE_SIZE, shuffle=False, predict=False)

history = model.fit_generator(train_gen, validation_data=valid_gen, callbacks=[learning_rate], epochs=Configuration.EPOCHS, workers=Configuration.WORKERS, use_multiprocessing=True)

In [None]:
plt.figure(figsize=(12,4))
plt.subplot(131)
plt.plot(history.epoch, history.history["loss"], label="Train loss")
plt.plot(history.epoch, history.history["val_loss"], label="Valid loss")
plt.legend()
plt.subplot(133)
plt.plot(history.epoch, history.history["mean_iou"], label="Train iou")
plt.plot(history.epoch, history.history["val_mean_iou"], label="Valid iou")
plt.legend()
plt.show()

In [None]:
# load and shuffle filenames
test_filenames = os.listdir(Configuration.TEST_IMAGES_PATH)
print('number of test samples:', len(test_filenames))

# create test generator with predict flag set to True
test_gen = generator(Configuration.TEST_IMAGES_PATH, test_filenames, None, batch_size=Configuration.BATCH, image_size=256, shuffle=False, predict=True)

# create submission dictionary
submission_dict = {}
# loop through testset
for imgs, filenames in test_gen:
    # predict batch of images
    preds = model.predict(imgs)
    # create figure
    f, axarr = plt.subplots(4, 8, figsize=(50,30))
    axarr = axarr.ravel()
    axidx = 0
    # loop through batch
    for img, pred, filename in zip(imgs, preds, filenames):
         # plot image
        axarr[axidx].imshow(img[:, :, 0])
        # resize predicted mask
        pred = resize(pred, (1024, 1024), mode='reflect')
        # threshold predicted mask
        comp = pred[:, :, 0] > 0.5
        # apply connected components
        comp = measure.label(comp)
        # apply bounding boxes
        predictionString = ''
        for region in measure.regionprops(comp):
            # retrieve x, y, height and width
            y, x, y2, x2 = region.bbox
            height = y2 - y
            width = x2 - x
            axarr[axidx].add_patch(patches.Rectangle((x,y),width,height,linewidth=2,edgecolor='b',facecolor='none'))
            # proxy for confidence score
            conf = np.mean(pred[y:y+height, x:x+width])
            # add to predictionString
            predictionString += str(conf) + ' ' + str(x) + ' ' + str(y) + ' ' + str(width) + ' ' + str(height) + ' '
        # add filename and predictionString to dictionary
        axidx += 1
        filename = filename.split('.')[0]
        submission_dict[filename] = predictionString
    plt.show()
    # only process one batch
    break
print(submission_dict)