#  Intracranial Hemorrhage Detection

## Model 9

#### Kristina Joos

---   

|                 	|                                                        	|
|:----------------:	|:-------------------------------------------------------:	|
| Model           	| InceptionResNetV2                                         |
| Augmentation      | FlipLR, FlupUD,Crop                                                          |
| Windowing         | Tree Windows                                                      	|
| Class Balancing 	| Oversampling                                           	|
| Loss Function   	| Binary_crossentropy                                      	|
| Regularization  	| Early Stopping, Drop Out Layers 0.3 	                    |
| Epochs Run      	| 10                                                       	|
| Time Run (min)   	|  6pm-at least 12, 8:40-   for epoch 6-9                                                    	|
|                 	|                                                        	|
| Test Sores      	| Accuracy: Loss:                                        	|
| Validation      	| Accuracy: Loss:                                        	|
| Leader Board    	| Score: 0.57152 Rank:                                           	|



Some code taken from:
* https://www.kaggle.com/omission/eda-view-dicom-images-with-correct-windowing
* https://www.kaggle.com/c/rsna-intracranial-hemorrhage-detection/discussion/109649#latest-631701


# Installations

# Imports

In [1]:
import glob
import os

#import joblib

import numpy as np

import PIL

import pydicom

import tqdm


import numpy as np
import pandas as pd
import pydicom
import os
import collections
import sys
import glob
import random
import cv2
import tensorflow as tf
import multiprocessing

from math import ceil, floor
from copy import deepcopy
from tqdm import tqdm
from imgaug import augmenters as iaa

import tensorflow.keras
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.densenet import DenseNet201
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input

# Install Modules from internet
# !pip install efficientnet
# !pip install iterative-stratification

# Import Custom Modules
#import efficientnet.keras as efn 
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit

In [2]:
tf.test.is_gpu_available()

False

In [3]:
tf.test.is_built_with_cuda()

True

# Setup

# Mount ZIP with fuse-zip

In [4]:
tf.test.is_gpu_available()

tf.test.is_built_with_cuda()

input_dir = '../data/input/rsna-intracranial-hemorrhage-detection/'


# Get images path

In [5]:
train_dir = '../data/input/rsna-intracranial-hemorrhage-detection/stage_2_train/'
train_paths = glob.glob(f"{train_dir}/*.dcm")
test_dir = '../data/input/rsna-intracranial-hemorrhage-detection/stage_2_test/'
test_paths = glob.glob(f"{test_dir}/*.dcm")
len(train_paths), len(test_paths)

(752803, 121232)

In [6]:
# Seed
SEED = 12345
np.random.seed(SEED)
#tf.set_random_seed(SEED)

# Constants
TEST_SIZE = 0.15
HEIGHT = 299
WIDTH = 299
CHANNELS = 3
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 16
SHAPE = (HEIGHT, WIDTH, CHANNELS)

# Folders
#DATA_DIR = '/kaggle/input/rsna-intracranial-hemorrhage-detection/'
TEST_IMAGES_DIR = test_dir
TRAIN_IMAGES_DIR = train_dir

In [7]:
def correct_dcm(dcm):
    x = dcm.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    dcm.PixelData = x.tobytes()
    dcm.RescaleIntercept = -1000

def window_image(dcm, window_center, window_width):    
    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
        correct_dcm(dcm)
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    
    # Resize
    img = cv2.resize(img, SHAPE[:2], interpolation = cv2.INTER_LINEAR)
   
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)
    return img

def bss_window(dcm):
    brain_img = window_image(dcm, 40, 80)
    subdural_img = window_image(dcm, 80, 200)
    soft_img = window_image(dcm, 40, 380)
    
    brain_img = (brain_img - 0) / 80
    subdural_img = (subdural_img - (-20)) / 200
    soft_img = (soft_img - (-150)) / 380
    bss_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)
    return bss_img

def _read(path, SHAPE):
    dcm = pydicom.dcmread(path)
    try:
        img = bsb_window(dcm)
    except:
        img = np.zeros(SHAPE)
    return img

In [8]:
# Image Augmentation
sometimes = lambda aug: iaa.Sometimes(0.25, aug)
augmentation = iaa.Sequential([ iaa.Fliplr(0.25),
                                iaa.Flipud(0.10),
                                sometimes(iaa.Crop(px=(0, 25), keep_size = True, sample_independently = False))   
                            ], random_order = True)       
        
# Generators
class TrainDataGenerator(Sequence):
    def __init__(self, dataset, labels, batch_size = 16, img_size = SHAPE, img_dir = TRAIN_IMAGES_DIR, augment = False, *args, **kwargs):
        self.dataset = dataset
        self.ids = dataset.index
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = train_dir
        self.augment = augment
        self.on_epoch_end()

    def __len__(self):
        return int(ceil(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X, Y = self.__data_generation(indices)
        return X, Y

    def augmentor(self, image):
        augment_img = augmentation        
        image_aug = augment_img.augment_image(image)
        return image_aug

    def on_epoch_end(self):
        self.indices = np.arange(len(self.ids))
        np.random.shuffle(self.indices)

    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        Y = np.empty((self.batch_size, 6), dtype=np.float32)
        
        for i, index in enumerate(indices):
            ID = self.ids[index]
            image = _read(self.img_dir+ID+".dcm", self.img_size)########
            if self.augment:
                X[i,] = self.augmentor(image)
            else:
                X[i,] = image
            Y[i,] = self.labels.iloc[index].values        
        return X, Y
    
class TestDataGenerator(Sequence):
    def __init__(self, dataset, labels, batch_size = 16, img_size = SHAPE, img_dir = TEST_IMAGES_DIR, *args, **kwargs):
        self.dataset = dataset
        self.ids = dataset.index
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = test_dir
        self.on_epoch_end()

    def __len__(self):
        return int(ceil(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indices)
        return X

    def on_epoch_end(self):
        self.indices = np.arange(len(self.ids))
    
    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        
        for i, index in enumerate(indices):
            ID = self.ids[index]
            image = _read(self.img_dir+ID+".dcm", self.img_size)########
            X[i,] = image              
        return X

In [9]:
a = pd.read_csv('../data/input/rsna-intracranial-hemorrhage-detection/stage_2_train.csv')
a.head()

Unnamed: 0,ID,Label
0,ID_12cadc6af_epidural,0
1,ID_12cadc6af_intraparenchymal,0
2,ID_12cadc6af_intraventricular,0
3,ID_12cadc6af_subarachnoid,0
4,ID_12cadc6af_subdural,0


In [10]:
df_bigdata_duplicates = a[a.duplicated()]

In [11]:
a.shape

(4516842, 2)

In [12]:
df_bigdata_duplicates

Unnamed: 0,ID,Label
56346,ID_a64d5deed_epidural,0
56347,ID_a64d5deed_intraparenchymal,0
56348,ID_a64d5deed_intraventricular,0
56349,ID_a64d5deed_subarachnoid,0
56350,ID_a64d5deed_subdural,0
56351,ID_a64d5deed_any,0
1171830,ID_854fba667_epidural,0
1171831,ID_854fba667_intraparenchymal,0
1171832,ID_854fba667_intraventricular,0
1171833,ID_854fba667_subarachnoid,0


In [10]:
def read_testset(filename = '../data/input/rsna-intracranial-hemorrhage-detection/stage_2_sample_submission.csv'):
    df = pd.read_csv(filename)
    df["Image"] = df["ID"].str.slice(stop=12)
    df["Diagnosis"] = df["ID"].str.slice(start=13)
    df = df.loc[:, ["Label", "Diagnosis", "Image"]]
    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
    return df

def read_trainset(filename = '../data/input/rsna-intracranial-hemorrhage-detection/stage_2_train.csv'):
    df = pd.read_csv(filename)
    df["Image"] = df["ID"].str.slice(stop=12)
    df["Diagnosis"] = df["ID"].str.slice(start=13)
    
    duplicates_to_remove = [
        56346,56347,56348,56349,
        56350,56351,1171830,1171831,
        1171832,1171833,1171834,1171835,
        3705312,3705313,3705314,3705315,
        3705316,3705317,3842478,3842479,
        3842480,3842481,3842482,3842483
    ]
    df = df.drop(index = duplicates_to_remove)

    df = df.reset_index(drop = True)    
    df = df.loc[:, ["Label", "Diagnosis", "Image"]]
    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
    return df


In [11]:

# Read Train and Test Datasets
test_df = read_testset()
train_df = read_trainset()

In [12]:
# Oversampling
epidural_df = train_df[train_df.Label['epidural'] == 1]
epidural_df1 = train_df[train_df.Label['epidural'] == 1]
train_oversample_df = pd.concat([train_df, epidural_df])
train_oversample_df = pd.concat([train_oversample_df, epidural_df1])
train_df = train_oversample_df

# Summary
print('Train Shape: {}'.format(train_df.shape))
print('Test Shape: {}'.format(test_df.shape))

Train Shape: (759093, 6)
Test Shape: (121232, 6)


In [13]:
epidural_df1 = train_df[train_df.Label['epidural'] == 1]

intraparenchymal_df1 = train_df[train_df.Label['intraparenchymal'] == 1]

subdural_df1 = train_df[train_df.Label['subdural'] == 1]

intraventricular_df1 = train_df[train_df.Label['intraventricular'] == 1]

subarachnoid_df1 = train_df[train_df.Label['subarachnoid'] == 1]

any_df1 = train_df[train_df.Label['any'] == 1]

In [14]:
filepath = '../models/mymodels/'

In [15]:

def predictions(test_df, model):    
    test_preds = model.predict_generator(TestDataGenerator(test_df, None, 5, SHAPE, TEST_IMAGES_DIR), verbose = 1)
    return test_preds[:test_df.iloc[range(test_df.shape[0])].shape[0]]

def ModelCheckpointFull(model_name):
    return ModelCheckpoint(filepath + model_name, 
                            monitor = 'val_loss', 
                            verbose = 1, 
                            save_best_only = False, 
                            save_weights_only = True, 
                            mode = 'min', 
                            save_freq = 1)

# Create Model
def create_model():
    #K.clear_session()
    
    base_model =  InceptionResNetV2(weights = 'imagenet', include_top = False, pooling = 'avg', input_shape = SHAPE)
    #base_model.load_weights(filepath+'model.h5')
    x = base_model.output
    x = Dropout(0.15)(x)
    y_pred = Dense(6, activation = 'sigmoid')(x)

    return Model(inputs = base_model.input, outputs = y_pred)

In [16]:
# Submission Placeholder
submission_predictions = []

# Multi Label Stratified Split stuff...
msss = MultilabelStratifiedShuffleSplit(n_splits = 10, test_size = TEST_SIZE, random_state = SEED)
X = train_df.index
Y = train_df.Label.values

# Get train and test index
msss_splits = next(msss.split(X, Y))
train_idx = msss_splits[0]
valid_idx = msss_splits[1]

In [17]:
len(msss_splits[1])

113864

In [18]:

for epoch, msss_splits in zip(range(6, 9 ), msss.split(X, Y)): 

    train_idx = msss_splits[0]
    valid_idx = msss_splits[1]
    
    LR = 0.00015

    print(f'=========== EPOCH {epoch}')

    np.random.shuffle(train_idx)
    print(train_idx[:5])    
    print(valid_idx[:5])

    data_generator_train = TrainDataGenerator(train_df.iloc[train_idx], 
                                                train_df.iloc[train_idx], 
                                                TRAIN_BATCH_SIZE, 
                                                SHAPE,
                                                augment = True)
    data_generator_val = TrainDataGenerator(train_df.iloc[valid_idx], 
                                            train_df.iloc[valid_idx], 
                                            VALID_BATCH_SIZE, 
                                            SHAPE,
                                            augment = False)

    # Create Model
    model9 = create_model()
    
    # Full Training Model
    for base_layer in model9.layers[:-1]:
        base_layer.trainable = True
        TRAIN_STEPS = int(len(data_generator_train) / 7)
        TRAIN_STEPS = TRAIN_STEPS + 1000
     
    if epoch != 0:
        weights = filepath + "model9.h5"
        model9.load_weights(weights)
        

    model9.compile(optimizer = Adam(LR), 
                  loss = 'binary_crossentropy',
                  metrics = ['acc'])
    
    # Train Model
    model9.fit_generator(generator = data_generator_train,
                        validation_data = data_generator_val,
                        steps_per_epoch = TRAIN_STEPS,
                        epochs = 1,
                        callbacks = [ModelCheckpointFull('model9.h5')],
                        verbose = 1)
    
    # Starting with the 6th epoch we create predictions for the test set on each epoch
    if epoch > 5:
        preds = predictions(test_df, model9)
        submission_predictions.append(preds)

KeyboardInterrupt: 

In [20]:
model_9 = InceptionResNetV2(weights = '../models/mymodels/model9.h5', include_top = True, pooling = 'avg', input_shape = SHAPE)
    #base_model.load_weights(filepath+'model.h5')

ValueError: Shapes (1536, 1000) and (1536, 6) are incompatible

In [None]:
model.summary()
# load dataset
dataset = loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# evaluate the model
score = model.evaluate(X, Y, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

In [1]:
submission_predictions

NameError: name 'submission_predictions' is not defined

In [None]:
[2**i for i in range(len(submission_predictions))]

In [None]:
[i for i in range(len(submission_predictions))]

In [None]:
test_df.iloc[:, :] = np.average(submission_predictions, axis = 0, weights = [1,2,4])
test_df = test_df.stack().reset_index()
test_df.insert(loc = 0, column = 'ID', value = test_df['Image'].astype(str) + "_" + test_df['Diagnosis'])
test_df = test_df.drop(["Image", "Diagnosis"], axis=1)
test_df.to_csv('submission.csv', index = False)
print(test_df.head(12))

In [39]:
test_df.to_csv('../data/output/submissions/model_9.csv', index = False)

In [None]:
test_df.iloc[:, :] = submission_predictions[2]
test_df = test_df.stack().reset_index()
test_df.insert(loc = 0, column = 'ID', value = test_df['Image'].astype(str) + "_" + test_df['Diagnosis'])
test_df = test_df.drop(["Image", "Diagnosis"], axis=1)

print(test_df.head(12))

In [None]:
test_df.to_csv('../data/output/submissions/model_9_epoch9.csv', index = False)

In [40]:
preds_epoch3 = submission_predictions[2]

In [76]:
preds_epoch3 

array([[2.0057172e-02, 1.0902882e-03, 1.9682646e-03, 7.4896216e-04,
        1.4631480e-02, 5.0401092e-03],
       [3.1204522e-03, 3.5116076e-04, 1.6024709e-04, 1.8709898e-04,
        2.3715794e-03, 1.2217760e-03],
       [1.1073649e-03, 3.9637089e-04, 4.3302774e-05, 2.0533800e-05,
        6.1187148e-04, 2.1779537e-04],
       ...,
       [2.0334423e-03, 8.4105134e-04, 1.1232495e-04, 2.4408102e-05,
        1.5593162e-03, 5.2382587e-04],
       [4.0601194e-03, 1.4871359e-04, 8.6516142e-04, 1.0338426e-04,
        2.1260381e-03, 1.0159016e-03],
       [1.9604445e-02, 5.8522224e-03, 1.7835796e-03, 4.5844913e-04,
        8.8746548e-03, 6.6677630e-03]], dtype=float32)

In [82]:
preds_epoch3_clipped = np.clip(preds_epoch3,1e-4,1-1e-6)

In [83]:
preds_epoch3_clipped 

array([[2.0057172e-02, 1.0902882e-03, 1.9682646e-03, 7.4896216e-04,
        1.4631480e-02, 5.0401092e-03],
       [3.1204522e-03, 3.5116076e-04, 1.6024709e-04, 1.8709898e-04,
        2.3715794e-03, 1.2217760e-03],
       [1.1073649e-03, 3.9637089e-04, 9.9999997e-05, 9.9999997e-05,
        6.1187148e-04, 2.1779537e-04],
       ...,
       [2.0334423e-03, 8.4105134e-04, 1.1232495e-04, 9.9999997e-05,
        1.5593162e-03, 5.2382587e-04],
       [4.0601194e-03, 1.4871359e-04, 8.6516142e-04, 1.0338426e-04,
        2.1260381e-03, 1.0159016e-03],
       [1.9604445e-02, 5.8522224e-03, 1.7835796e-03, 4.5844913e-04,
        8.8746548e-03, 6.6677630e-03]], dtype=float32)

In [60]:
preds_epoch3.shape

(121232, 6)

In [85]:
df = test_df

In [86]:
df.shape

(121232, 6)

In [87]:
preds_epoch3_clipped.shape

(121232, 6)

In [88]:
df.iloc[:, :] = preds_epoch3_clipped
df = df.stack().reset_index()
df.insert(loc = 0, column = 'ID', value = df['Image'].astype(str) + "_" + df['Diagnosis'])
df = df.drop(["Image", "Diagnosis"], axis=1)

print(df.head(12))

                               ID     Label
0                ID_000000e27_any  0.020057
1           ID_000000e27_epidural  0.001090
2   ID_000000e27_intraparenchymal  0.001968
3   ID_000000e27_intraventricular  0.000749
4       ID_000000e27_subarachnoid  0.014631
5           ID_000000e27_subdural  0.005040
6                ID_000009146_any  0.003120
7           ID_000009146_epidural  0.000351
8   ID_000009146_intraparenchymal  0.000160
9   ID_000009146_intraventricular  0.000187
10      ID_000009146_subarachnoid  0.002372
11          ID_000009146_subdural  0.001222


In [89]:
df.to_csv('../data/output/submissions/model_4_epoch3_clipped.csv', index = False)