In [None]:
# -*- coding: utf-8 -*-
"""final-submission.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1cNndOCXSkssZtnELJgpebPyzV3J8dqy_
"""

# Installing packages required
!conda install -c conda-forge gdcm -y
!pip install git+https://github.com/titu1994/keras-efficientnets.git
!pip install keras_applications==1.0.8 --no-deps
!pip install keras==2.2.4
# !pip install keras-preprocessing==1.2

# Imports
import numpy as np 
import pandas as pd
import os
import gc
import time
from IPython.display import clear_output
import random


from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint as MC
from tensorflow.keras import backend as K
import tensorflow as tf
import keras
from tensorflow import keras
from keras import applications
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Conv2D

# Image processing imports
from scipy import ndimage
import matplotlib.pyplot as plt
import vtk
from vtk.util import numpy_support
import cv2

# Imports for working with DICOM Images
import pydicom
import scipy.ndimage
import gdcm

from os import listdir, mkdir

# Kaggle working directory
root = '/kaggle/input/rsna-str-pulmonary-embolism-detection'
for item in os.listdir(root):
    path = os.path.join(root, item)
    if os.path.isfile(path):
        print(path)

"""## Importing and Checking Datasets

Training Data
"""

print('Reading train data...')
train = pd.read_csv("../input/rsna-str-pulmonary-embolism-detection/train.csv")
print(train.shape)
train.head()

"""Test Data"""

print('Reading test data...')
test = pd.read_csv("../input/rsna-str-pulmonary-embolism-detection/test.csv")
print(test.shape)
test.head()

"""Sample Submission File for Final Calculation"""

print('Reading sample data...')
ss = pd.read_csv("../input/rsna-str-pulmonary-embolism-detection/sample_submission.csv")
print(ss.shape)
ss.head()

"""Checking the IDs and Keys"""

ids = ss.id
counts = [1 for _ in range(10)]
types = []

for i in ids:
    n = '_'.join(i.split('_')[1:])
    if n not in types:
        types.append(n)
    else:
        counts[types.index(n)] += 1
        
for x in range(len(counts)):
    print(str(types[x]) + " ------> " + str(counts[x]) )

# Helper Functions

# Function to get DICOM Image Array

In [None]:

def load_scans(dcm_path):
    '''
    Reads the DICOM Image from a path and extracts a slice
    '''   
    slice_new = pydicom.dcmread(dcm_path)
    return slice_new

def convert_to_rgb(array):
    '''
    Converts a dicom array to RGB values
    '''
    array = array.reshape((512, 512, 1))
    
    return np.stack([array, array, array], axis=2).reshape((512, 512, 3))

# Image Pre-processing and Augmentation

def random_crop(img):
    '''
    Randmly crops image to get it to a desired dimension and returns the cropped image
    '''
    DIM = 320
    
    new_image = tf.image.random_crop(img, size=[DIM, DIM])
    arr = new_image.numpy()
    
    return np.stack([arr, arr, arr], axis=2).reshape((DIM, DIM, 3))


def random_rotation(img, angle=8):
    '''
    Rotates the image randomly in positive or negative direction depending on specified angle range
    '''
    degree = random.uniform(0, 1)*angle
    orientation = random.choice([-1, 1])
    new_image = ndimage.rotate(img, degree*orientation, reshape=False)
    
    return new_image


def random_flip(img, threshold=0.5):
    '''
    Based on a uniform probability distribution and specified threshold, 
    horizontally flips the image 
    '''
    
    chance = random.uniform(0,1)
    
    if chance > threshold:
        new_img = tf.image.flip_left_right(img)
    else:
        new_img = img
    
    return new_img

def change_contrast(img, factor=2):
    '''
    Change contrast of the image
    '''
    
    new_img = tf.image.adjust_contrast(img, factor)
    
    return new_img

# Windowing

# Set the threshold of the pixels in image
def set_outside_scanner_to_air(raw_pixelarrays, neg_thresh=-1000):
    '''
    In OSIC we find outside-scanner-regions with raw-values of -2000. 
    This function thresholds between air (0) and this default (-2000) using -1000
    ''' 
    raw_pixelarrays[raw_pixelarrays <= neg_thresh] = 0
    return raw_pixelarrays

def transform_to_hu(slices):
    '''
    Convert the Image to HU and return the 16 bit image
    '''
    
    image = slices.pixel_array.astype(np.int16)
    
    image = set_outside_scanner_to_air(image)
    
    b = slices.RescaleIntercept
    m = slices.RescaleSlope
    
    if b != 1:
            image = m * image.astype(np.float64)
            image = image.astype(np.int16)
    
    return np.array(image, dtype=np.int16)


def set_manual_window(hu_image, custom_center, custom_width):
    '''
    Window the image based on manually defined center and width points
    '''
    
    window_image = hu_image.copy()
    
    min_value = custom_center - (custom_width/2)
    max_value = custom_center + (custom_width/2)
    
    window_image[window_image < min_value] = min_value
    window_image[window_image > max_value] = max_value
    
    return window_image
    

def three_channel_stacker(hu_scan, r_window=[40,400], g_window=[100, 700], b_window=[-600,1500], DIM=512 ):
    '''
    Create an RGB 3-channel Image from the HU filtered image  using windowing for each channel
    '''
    
    r_channel = set_manual_window(hu_scan, r_window[0], r_window[1] )
    g_channel = set_manual_window(hu_scan, g_window[0], g_window[1])
    b_channel = set_manual_window(hu_scan, b_window[0], b_window[1])
    
    r_channel = r_channel.reshape((DIM, DIM, 1))
    g_channel = g_channel.reshape((DIM, DIM, 1))
    b_channel = b_channel.reshape((DIM, DIM, 1))
    
    return np.stack([r_channel, g_channel, b_channel], axis=2).reshape((DIM, DIM, 3))

In [None]:
"""Testing out Pre-Processing"""

example = "../input/rsna-str-pulmonary-embolism-detection/train/6897fa9de148/2bfbb7fd2e8b/822dd7790999.dcm"

ds = load_scans(example)
scan = load_scans(example)
hu_scan = transform_to_hu(scan)
final_image = three_channel_stacker(hu_scan)

f, ax = plt.subplots(1,4, figsize=(30,30))
# ax[0].imshow(ds)
ax[0].imshow(hu_scan)
ax[1].imshow(final_image)
ax[2].imshow(random_flip(final_image))
ax[3].imshow(change_contrast(random_flip(random_rotation(final_image)), 2))

In [None]:

"""## Model Creation"""

inputs = Input((512, 512, 3))

base_model = keras.applications.Xception(include_top=False, weights="imagenet")
base_model.trainable = False

outputs = base_model(inputs, training=False)
outputs = keras.layers.GlobalAveragePooling2D()(outputs)
outputs = Dropout(0.25)(outputs)
outputs = Dense(1024, activation='relu')(outputs)
outputs = Dense(256, activation='relu')(outputs)
outputs = Dense(64, activation='relu')(outputs)

pe_present_on_image = Dense(1, activation='sigmoid', name='pe_present_on_image')(outputs)
rv_lv_ratio_gte_1 = Dense(1, activation='sigmoid', name='rv_lv_ratio_gte_1')(outputs)
rv_lv_ratio_lt_1 = Dense(1, activation='sigmoid', name='rv_lv_ratio_lt_1')(outputs) 
leftsided_pe = Dense(1, activation='sigmoid', name='leftsided_pe')(outputs)
chronic_pe = Dense(1, activation='sigmoid', name='chronic_pe')(outputs)
rightsided_pe = Dense(1, activation='sigmoid', name='rightsided_pe')(outputs)
acute_and_chronic_pe = Dense(1, activation='sigmoid', name='acute_and_chronic_pe')(outputs)
central_pe = Dense(1, activation='sigmoid', name='central_pe')(outputs)
indeterminate = Dense(1, activation='sigmoid', name='indeterminate')(outputs)

model = Model(inputs=inputs, outputs={'pe_present_on_image':pe_present_on_image,
                                      'rv_lv_ratio_gte_1':rv_lv_ratio_gte_1,
                                      'rv_lv_ratio_lt_1':rv_lv_ratio_lt_1,
                                      'leftsided_pe':leftsided_pe,
                                      'chronic_pe':chronic_pe,
                                      'rightsided_pe':rightsided_pe,
                                      'acute_and_chronic_pe':acute_and_chronic_pe,
                                      'central_pe':central_pe,
                                      'indeterminate':indeterminate
                                     })

opt = keras.optimizers.Adam(lr=0.001)

model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

model.summary()
model.save('improved_model.h5')
del model
K.clear_session()
gc.collect()

In [None]:



"""## Image Generator"""

def data_preprocessor(batch_size, dataset, test=False, debug=True):
    
    scan_ids = dataset[['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID']]

    if not test:
        Y = dataset[['pe_present_on_image', 
                     'rv_lv_ratio_gte_1', 
                     'rv_lv_ratio_lt_1', 
                     'leftsided_pe',
                     'chronic_pe', 
                     'rightsided_pe', 
                     'acute_and_chronic_pe', 
                     'central_pe', 
                     'indeterminate'
                    ]]
        location = 'input/rsna-str-pulmonary-embolism-detection/train'
        
    else:
        location = 'input/rsna-str-pulmonary-embolism-detection/test'
    
    X = []
    batch = 0
    for study, series, sliceOfPatient in scan_ids.values:

        # Extract the location of the images
        if debug:
            print(f"Current file: ../{location}/{study}/{series}/{sliceOfPatient}.dcm")
        
        
        # Extract the HU frames and window the image
        scan = load_scans(f"../{location}/{study}/{series}/{sliceOfPatient}.dcm")
        image = transform_to_hu(scan)
        image = three_channel_stacker(image)
        
        # Apply the augmentations
        image = random_flip(image)
        image = random_rotation(image)
        image = change_contrast(image)

        
        X.append(image)
        del study, series, sliceOfPatient
        
        if len(X) == batch_size:
            if test:
                yield np.array(X)
                del X
            else:
                yield np.array(X), Y[batch*batch_size:(batch+1)*batch_size].values
                del X
                
            gc.collect()
            X = []
            batch += 1
        
    if test:
        yield np.array(X)
    else:
        yield np.array(X), Y[batch*batch_size:(batch+1)*batch_size].values
        del Y
    del X
    gc.collect()
    return

In [None]:

"""## Training"""

history = {}
start = time.time()
debug = 0
batch_size = 1000
train_size = int(batch_size*0.9)    # Use 90% of the batch for training

# Set the maxout of training time 
max_training_hours = 4.5  
max_train_time = 3600 * max_training_hours            

checkpoint = MC(filepath='../working/improved_model.h5', monitor='val_loss', save_best_only=True, verbose=1)

#Train loop
for n, (x, y) in enumerate(data_preprocessor(batch_size, train.sample(frac=1), test=False, debug=False)):
    
    if len(x) < 10: #Tries to filter out empty or short data
        break
        
    clear_output(wait=True)
    
    print("Training batch: %i - %i" %(batch_size*n, batch_size*(n+1)))
    
    model = load_model('../working/improved_model.h5')
    hist = model.fit(
        x[:train_size], #Y values are in a dict as there's more than one target for training output
        {
         'pe_present_on_image':y[:train_size, 0],
         'rv_lv_ratio_gte_1':y[:train_size, 1],
         'rv_lv_ratio_lt_1':y[:train_size, 2],
         'leftsided_pe':y[:train_size, 3],
         'chronic_pe':y[:train_size, 4],
         'rightsided_pe':y[:train_size, 5],
         'acute_and_chronic_pe':y[:train_size, 6],
         'central_pe':y[:train_size, 7],
         'indeterminate':y[:train_size, 8]
        },

        callbacks = checkpoint,

        validation_split=0.2,
        epochs=3,
        batch_size=8,
        verbose=debug
    )
    
    print("Metrics for batch validation:")
    model.evaluate(x[train_size:], {
                                    'pe_present_on_image':y[train_size:, 0],
                                    'rv_lv_ratio_gte_1':y[train_size:, 1],
                                    'rv_lv_ratio_lt_1':y[train_size:, 2],
                                    'leftsided_pe':y[train_size:, 3],
                                    'chronic_pe':y[train_size:, 4],
                                    'rightsided_pe':y[train_size:, 5],
                                    'acute_and_chronic_pe':y[train_size:, 6],
                                    'central_pe':y[train_size:, 7],
                                    'indeterminate':y[train_size:, 8]
                                   }
                   
                  )
    
    try:
        for key in hist.history.keys():
            history[key] = np.concatenate([history[key], hist.history[key]], axis=0)
    except:
        for key in hist.history.keys():
            history[key] = hist.history[key]
            
    #To make sure that our model don't train overtime
    if time.time() - start >= max_train_time:
        print("Max Training time Reached!")
        break
        
    model.save('improved_model.h5')
    
    del model, x, y, hist
    
    K.clear_session()
    gc.collect()

In [None]:

"""Plot the Losses and Accuracies"""

for key in history.keys():
    if key.startswith('val'):
        continue
    else:
        epoch = range(len(history[key]))
        plt.plot(epoch, history[key]) #X=epoch, Y=value
        plt.plot(epoch, history['val_'+key])
        plt.title(key)
        if 'accuracy' in key:
            plt.axis([0, len(history[key]), -0.1, 1.1]) #Xmin, Xmax, Ymin, Ymax
        plt.legend(['train', 'validation'], loc='upper right')
        plt.show()