# Intro
Welcome to the [RSNA Intracranial Hemorrhage Detection](https://www.kaggle.com/c/rsna-intracranial-hemorrhage-detection).

![](https://storage.googleapis.com/kaggle-competitions/kaggle/13451/logos/header.png)

This notebook is a starter code for all beginners and easy to understand. Used is a image generator based on this [template](https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly)

The hemorrhage types are explained [here](https://www.kaggle.com/c/rsna-intracranial-hemorrhage-detection/overview/hemorrhage-types).

The model is based on ResNet50 and runs on GPU.

<span style="color: royalblue;">Please vote the notebook up if it helps you. Thank you. </span>

# Load Libraries

In [None]:
import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt

In [None]:
import pydicom as dicom
import cv2

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

In [None]:
from keras.utils import to_categorical, Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, Activation
from keras.optimizers import RMSprop,Adam
from keras.applications import VGG19, VGG16, ResNet50

In [None]:
import tensorflow as tf

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Path
Define the path for the subfolders with the data.

In [None]:
path_in = "../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/"
os.listdir(path_in)

Define the sub paths with images

In [None]:
path_train_img = path_in + 'stage_2_train'
path_test_img = path_in + 'stage_2_test'

Path to the pretrained data set.

In [None]:
path_models = '../input/models' 
os.listdir(path_models)

# Functions
We define some helper functions.

In [None]:
def rescale_pixelarray(dataset):
    image = dataset.pixel_array
    rescaled_image = image * dataset.RescaleSlope + dataset.RescaleIntercept
    rescaled_image[rescaled_image < -1024] = -1024
    return rescaled_image

In [None]:
def plot_example(data, sub_type='subdural'):
    """ Plot 5 examples of a given subtype """
    
    fig, axs = plt.subplots(1, 5, figsize=(25, 12))
    fig.subplots_adjust(hspace = .2, wspace=.2)
    axs = axs.ravel()
    for i in range(5):
        idx = data[(data['Label']==1)&(data['sub_type']==sub_type)].index[i]
        data_file = dicom.dcmread(path_train_img+'/ID_'+data.loc[idx, 'PatientID']+'.dcm')
        #img = data_file.pixel_array
        img = rescale_pixelarray(data_file)
        if type(data_file.WindowCenter) == dicom.multival.MultiValue:
            window_center = int(data_file.WindowCenter[0])
        else: 
            window_center = int(data_file.WindowCenter)
            
        if type(data_file.WindowWidth) == dicom.multival.MultiValue:
            window_width = int(data_file.WindowWidth[0])
        else:
            window_width = int(data_file.WindowWidth)
        img_min = window_center - window_width // 2
        img_max = window_center + window_width // 2
        window_image = img.copy()
        window_image[window_image < img_min] = img_min
        window_image[window_image > img_max] = img_max
        axs[i].imshow(window_image, cmap=plt.cm.gray)
        axs[i].set_title(data.loc[idx, 'PatientID']+'_'+data.loc[idx, 'sub_type'])
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])

In [None]:
def plot_types(data, num_types):
    """ Plot image of patient with given number of sub types"""
    
    temp = data[(data['sub_type']!='any')&
           (data['Label']==1)].groupby('PatientID').sum()
    fig, ax = plt.subplots(1, 1, figsize=(15, 6))
   
    idx = temp[temp['Label']==num_types].index[0]
    sub_types = list(data[(data['PatientID']==idx)&
                     (data['Label']!=0)&
                     (data['sub_type']!='any')]['sub_type'].values)
    
    title = idx+':'
    for sub_type in sub_types:
        title = title+' '+sub_type
        if sub_types.index(sub_type) < len(sub_types)-1:
            title = title+','
    data_file = dicom.dcmread(path_train_img+'/ID_'+idx+'.dcm')
    img = rescale_pixelarray(data_file)
    if type(data_file.WindowCenter) == dicom.multival.MultiValue:
        window_center = int(data_file.WindowCenter[0])
    else: 
        window_center = int(data_file.WindowCenter)
            
    if type(data_file.WindowWidth) == dicom.multival.MultiValue:
        window_width = int(data_file.WindowWidth[0])
    else:
        window_width = int(data_file.WindowWidth)
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    window_image = img.copy()
    window_image[window_image < img_min] = img_min
    window_image[window_image > img_max] = img_max
    ax.imshow(window_image, cmap=plt.cm.gray)
    ax.set_title(title)
    ax.set_xticklabels([])
    ax.set_yticklabels([])

In [None]:
#plot_example(train_data, sub_type='subdural')

# Parameters

In [None]:
q_size = 200
img_channel = 3
num_classes = 6

# Read Image Name

In [None]:
list_train_img = os.listdir(path_train_img)
list_test_img = os.listdir(path_test_img)

# Read Input Data

In [None]:
train_data = pd.read_csv(path_in + 'stage_2_train.csv')
sub_org = pd.read_csv(path_in + 'stage_2_sample_submission.csv')

In [None]:
train_data.head()

# Modify Input Data

In [None]:
train_data['sub_type'] = train_data['ID'].str.split("_", n = 3, expand = True)[2]
train_data['PatientID'] = train_data['ID'].str.split("_", n = 3, expand = True)[1]
sub_org['sub_type'] = sub_org['ID'].str.split("_", n = 3, expand = True)[2]
sub_org['PatientID'] = sub_org['ID'].str.split("_", n = 3, expand = True)[1]

In [None]:
train_data['sub_type'].value_counts()

# Overview

In [None]:
print('number of (unique) train patient ids:', len(train_data['PatientID'].unique()))
print('number of train images: ', len(list_train_img))
print('number of (unique) test patient ids:', len(sub_org['PatientID'].unique()))
print('number of test images: ', len(list_test_img))

# EDA

## Intraparenchymal
* **Location**: Inside of the brain.
* **Mechanism**: Hight blood pressure, trauma, arteriovenous, malformation, tumor, etc.
* **Source**: Arterial or venous.
* **Shape**: Typically rounded.
* **Presentation**: Acute (sudden onest of headache, nausea, vomiting).

In [None]:
plot_example(train_data, sub_type='intraparenchymal')

## Intraventricular
* **Location**: Inside of the ventricle.
* **Mechanism**: Can be associated with both intraparenchymal and subarachnoid hermorrhages.
* **Source**: Arterial or venous.
* **Shape**: Conforms to ventricular shape.
* **Presentation**: Acute (sudden onest of headache, nausea, vomiting).

In [None]:
plot_example(train_data, sub_type='intraventricular')

## Subarachnoid
* **Location**: Between the arachonid and the pia mater.
* **Mechanism**: Rupture of aneurysms or arteriovenous malformations or trauma.
* **Source**: Predominantly arterial.
* **Shape**: Tracks along the sulci and fissures.
* **Presentation**: Acute (worst headache of life).

In [None]:
plot_example(train_data, sub_type='subarachnoid')

## Subdural
* **Location**: Between the Dura and the arachnoid.
* **Mechanism**: Trauma.
* **Source**: Venous (bridging veins).
* **Shape**: Crescent.
* **Presentation**: May be insidous (worsening headache).

In [None]:
plot_example(train_data, sub_type='subdural')

## Epidural
* **Location**: Between the dura and the skull.
* **Mechanism**: Trauma or after surgery.
* **Source**: Arterial.
* **Shape**: Lentiform.
* **Presentation**: Acute (skull fracture and altered mental status)

In [None]:
plot_example(train_data, sub_type='epidural')

## Group Subtypes
There are 5 subtyps and the addditional label any, which should always be true if any of the sub-type labels is true.

In [None]:
group_type = train_data.groupby('sub_type').sum()
fig = plt.figure(figsize=(9, 5))
ax = fig.add_subplot(111)
ax.bar(group_type.index, group_type['Label'])
ax.set_xticklabels(group_type.index, rotation=45)
plt.grid()
plt.show()

## Multilabel
There are a lot of patients with a multilabel.

In [None]:
train_data[(train_data['sub_type']!='any')&
           (train_data['Label']==1)].groupby('PatientID').sum()['Label'].value_counts()

So we can see there are 23 patients which have all labels.

### 1 Type

In [None]:
plot_types(train_data, 1)

### 2 Types

In [None]:
plot_types(train_data, 2)

### 3 Types

In [None]:
plot_types(train_data, 3)

### 4 Types

In [None]:
plot_types(train_data, 4)

### 5 Types (all types)

In [None]:
plot_types(train_data, 5)

# Train And Test Pivot

In [None]:
column_names = ['Label', 'PatientID', 'sub_type']
train_data_pivot = train_data[column_names].drop_duplicates().pivot(index='PatientID',
                                                                    columns='sub_type',
                                                                    values='Label')
test_data_pivot = sub_org[column_names].drop_duplicates().pivot(index='PatientID',
                                                                columns='sub_type',
                                                                values='Label')

# Select Subset Input Data For Training
This is a big dataset. So we select a smaller subset for the training.

In [None]:
percentage = 0.25
num_train_img = int(percentage*len(train_data_pivot.index))
num_test_img = len(test_data_pivot.index)
print('num_train_data:', len(list_train_img), num_train_img)
print('num_test_data:', len(list_test_img))
list_train_img = list(train_data_pivot.index)
list_test_img = list(test_data_pivot.index)
random_train_img = random.sample(list_train_img, num_train_img)

In [None]:
y_train_org = train_data_pivot.loc[random_train_img]

# Split Train And Val

In [None]:
y_train, y_val = train_test_split(y_train_org, test_size=0.3)
y_test = test_data_pivot

# Calculate Class Weights

In [None]:
class_weight = dict(zip(range(0, num_classes), y_train.sum()/y_train.sum().sum()))

In [None]:
class_weight

# Data Generator

In [None]:
class DataGenerator(Sequence):
    def __init__(self, path, list_IDs, labels, batch_size,
                 img_size, img_channel, num_classes, shuffle=True):
        self.path = path
        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_channel = img_channel
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.on_epoch_end()
     
    
    def __len__(self):
        return int(np.floor(len(self.list_IDs)/self.batch_size))
    
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        return X, y
    
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
    
    
    def rescale_pixelarray(self, dataset):
        image = dataset.pixel_array
        rescaled_image = image * dataset.RescaleSlope + dataset.RescaleIntercept
        rescaled_image[rescaled_image < -1024] = -1024
        return rescaled_image

    
    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, self.img_size, self.img_size))
        y = np.empty((self.batch_size, self.num_classes), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            data_file = dicom.dcmread(self.path+'/ID_'+ID+'.dcm')
            img = self.rescale_pixelarray(data_file)
            img = cv2.resize(img, (self.img_size, self.img_size))
            X[i, ] = img
            y[i, ] = self.labels.loc[ID]
        X = np.repeat(X[..., np.newaxis], 3, -1)
        X = X.astype('float32')
        X -= X.mean(axis=0)
        std = X.std(axis=0)
        X /= X.std(axis=0)
        return X, y

# Load Pretrained Model

In [None]:
conv_base = ResNet50(weights='../input/models/model_weights_resnet.h5',
                     include_top=False,
                     input_shape=(q_size, q_size, img_channel))
conv_base.trainable = True

# Define train and validation data via Data Generator

In [None]:
batch_size = 32
train_generator = DataGenerator(path_train_img, list(y_train.index), y_train,
                                batch_size, q_size, img_channel, num_classes)
val_generator = DataGenerator(path_train_img, list(y_val.index), y_val,
                                batch_size, q_size, img_channel, num_classes)

# Define the model

In [None]:
model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(6, activation='sigmoid'))

# Compile the model

In [None]:
model.compile(optimizer = RMSprop(lr=1e-5),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])

In [None]:
model.summary()

In [None]:
epochs = 5

# Fit the model with the fit_generator method

In [None]:
history = model.fit_generator(generator=train_generator,
                              validation_data=val_generator,
                              epochs = epochs,
                              class_weight = class_weight,
                              workers=4)

# Plot the loss values

In [None]:
loss = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'bo', label='loss_train')
plt.plot(epochs, loss_val, 'b', label='loss_val')
plt.title('value of the loss function')
plt.xlabel('epochs')
plt.ylabel('value of the loss functio')
plt.legend()
plt.grid()
plt.show()

# Plot the accuracy values

In [None]:
acc = history.history['binary_accuracy']
acc_val = history.history['val_binary_accuracy']
epochs = range(1, len(loss)+1)
plt.plot(epochs, acc, 'bo', label='accuracy_train')
plt.plot(epochs, acc_val, 'b', label='accuracy_val')
plt.title('accuracy')
plt.xlabel('epochs')
plt.ylabel('value of accuracy')
plt.legend()
plt.grid()
plt.show()

# Define the test data via Data Generator

In [None]:
batch_size = 16
test_generator = DataGenerator(path_test_img, list(y_test.index), y_test,
                                batch_size, q_size, img_channel, num_classes, shuffle=False)

# Predict the test images with the generator class

In [None]:
predict = model.predict_generator(test_generator, verbose=1)

In [None]:
assert(len(predict) == len(test_data_pivot))

# Prepare the prediction data by the export format

In [None]:
submission = pd.DataFrame(predict, columns=y_train_org.columns)
submission.insert(loc=0, column='PatientID', value=test_data_pivot.index)
submission.index=submission['PatientID']
submission = submission.drop(['PatientID'], axis=1)

In [None]:
submission = submission.stack().reset_index()
submission = submission.rename(columns={0: 'Label'})

In [None]:
submission.insert(loc=0, column='ID', value='ID_'+submission['PatientID'].astype(str)+'_'+submission['sub_type'].astype(str))
submission = submission.drop(['PatientID', 'sub_type'], axis=1)

In [None]:
submission.index = submission['ID']
submission = submission.reindex(sub_org['ID'])
submission.index = range(len(submission))

# Export the prediction data

In [None]:
submission.to_csv('submission.csv', header = True, index=False)

In [None]:
submission