In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#Data visualization

import matplotlib
import matplotlib.pyplot as plt
import pydicom
import cv2
from tqdm import tqdm

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
train_df =  pd.read_csv('../input/rsna-intracranial-hemorrhage-detection/stage_1_train.csv')
test_df = pd.read_csv('../input/rsna-intracranial-hemorrhage-detection/stage_1_sample_submission.csv')
train_images = os.listdir('../input/rsna-intracranial-hemorrhage-detection/stage_1_train_images/')
test_images = os.listdir('../input/rsna-intracranial-hemorrhage-detection/stage_1_test_images/')

In [None]:
dir_train = '../input/rsna-intracranial-hemorrhage-detection/stage_1_train_images/'
dir_input ='/kaggle/input/'
dir_test = '../input/rsna-intracranial-hemorrhage-detection/stage_1_test_images/'

# Exploratory analysis

In [None]:
print("Train CSV :",train_df.shape)
print("Test CSV :",test_df.shape)
print("Train Images:",len(train_images))
print("Test Images:",len(test_images))

In [None]:
display(train_df.head())

In [None]:
display(train_df.tail())

In [None]:
display(train_df.head())

In [None]:
print("Train: \n",train_df.count())
print("Test: \n",test_df.count())

In [None]:
train_df['Image_ID'] = train_df['ID'].str.rsplit(pat='_',n=1,expand=True)[0]
train_df['Hemorrhage'] = train_df['ID'].str.rsplit(pat='_',n=1,expand=True)[1]
train_df = train_df[['Image_ID','Hemorrhage','Label']]


In [None]:
train_df = train_df[train_df['Image_ID']!='ID_6431af929']
train_images.remove('ID_6431af929.dcm')

In [None]:
train_df.head()

In [None]:
#Nombre d'images uniques
print("Number of images :",train_df['Image_ID'].nunique())
print("Number of Hemorraghes :",train_df['Hemorrhage'].nunique())

In [None]:
pd.DataFrame(train_df['Image_ID'].value_counts()).reset_index().head(10)

In [None]:
display(test_df.head())

In [None]:
test_df['Image_ID'] = test_df['ID'].str.rsplit(pat='_',n=1,expand=True)[0]
test_df['Image_ID'] = test_df['Image_ID']+".png"
test_df = test_df['Image_ID'].drop_duplicates().reset_index()[['Image_ID']]

In [None]:
test_df.head()

In [None]:
pivot_df = train_df.drop_duplicates().pivot(index='Image_ID', columns='Hemorrhage', values='Label').reset_index()
pivot_df['Image_ID'] = pivot_df['Image_ID']+'.png'
pivot_df.head()

# Data visualization

##### An additional label for any, which should always be true if any of the sub-type labels is true. We could know the number of images that have any kind of hemorrhage with this variable

In [None]:
pourcentage = train_df[(train_df['Hemorrhage']=='any')&(train_df['Label']==1)]['Image_ID'].count()/train_df['Image_ID'].nunique()*100
print("Pourtage d'images avec un type d'hemorragie : ",round(pourcentage,2),'%')

pd.DataFrame([pourcentage,100-pourcentage],columns=['Pourcentage']).plot(kind='pie',y='Pourcentage',
                                                                  labels=['Hemorrhage','Non_Hemorrhage'],title='Repartition Hemorrhage',
                                                                  autopct='%.1f%%',figsize=(6,6),shadow=True, startangle=90)
plt.show()

In [None]:
Hemorrage = pd.DataFrame(train_df[(train_df['Label']==1)&(train_df['Hemorrhage']!='any')]['Hemorrhage'].value_counts()).reset_index()
Hemorrage.columns = ['Hemorrhage','Number_Pictures']

Hemorrage.plot(kind='pie',y='Number_Pictures',labels=Hemorrage['Hemorrhage'].unique(),title='Repartition Hemorrhage',
                                                                  autopct='%.1f%%',figsize=(6,6),shadow=True, startangle=90)
plt.show()

In [None]:
train_df.head()

# Prepocess Images

In [None]:
def get_first_of_dicom_field_as_int(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)

def get_metadata(image):
    metadata = {
        "window_center": image.WindowCenter,
        "window_width": image.WindowWidth,
        "intercept": image.RescaleIntercept,
        "slope": image.RescaleSlope
    }
    return {k: get_first_of_dicom_field_as_int(v) for k, v in metadata.items()}

def window_image(img, window_center, window_width, intercept, slope):
    img = img * slope + intercept
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img[img < img_min] = img_min
    img[img > img_max] = img_max
    return img

def normalize(image):
    min_image = image.min()
    max_image = image.max()
    return (image - min_image) / (max_image - min_image)

def resize(image,width,weight):
    resized = cv2.resize(image, (width, weight))
    return resized

def save(directory,image,image_normalized_resized):
    save_dir = '/kaggle/tmp/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    path = directory+image
    new_path = save_dir + image.replace('.dcm', '.png')        
    res = cv2.imwrite(new_path, image_normalized_resized)
    
def normalize_resize_save(dataset,width,weight,directory):
    for i in tqdm(dataset):
        image=pydicom.read_file(directory+i)
        image_windowed = window_image(image.pixel_array, ** get_metadata(image))
        image_normalized_resized = resize(normalize(image_windowed),width,weight)
        save(directory,i,image_normalized_resized)

    

#### Visualize first image in the Data Set

In [None]:
image=pydicom.read_file(dir_train+train_df['Image_ID'][0]+".dcm")
image_windowed = window_image(image.pixel_array, ** get_metadata(image))

display(image)
plt.imshow(image_windowed, cmap=plt.cm.bone)

#### Visualize images with hemorraghes

In [None]:
def view_images(data_frame,hemorraghe):
    width = 5
    height = 1
    fig, axs = plt.subplots(height, width, figsize=(20,5))

    list_hem = pd.DataFrame(train_df[(train_df['Label']==1)&(train_df['Hemorrhage']==hemorraghe)][['Image_ID']].head(width*height)+".dcm").reset_index()
    
    for i in range(0,width*height):
        image=pydicom.read_file(dir_train+list_hem['Image_ID'][i])
        image_windowed = window_image(image.pixel_array, ** get_metadata(image))
        fig.add_subplot(height,width, i+1)
        axs[i].set_title(list_hem['Image_ID'][i])
        plt.imshow(image_windowed, cmap=plt.cm.bone)
        
    plt.suptitle("Images with "+hemorraghe,fontsize = 20)
    plt.show()

In [None]:
for i in train_df['Hemorrhage'].unique():
    view_images(train_df,i)

#### Normalize, resize and save new images in png format[](http://)

In [None]:
# ts = pydicom.read_file(dir_train+train_df['Image_ID'][130464]+".dcm")
# train_images[130463]
# train_df[train_df['Image_ID']=='ID_6431af929']


In [None]:
normalize_resize_save(train_images,224,224,dir_train)
normalize_resize_save(test_images,224,224,dir_test)

# Model

In [None]:
from keras import layers
import tensorflow as tf
from keras.applications import DenseNet121
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam
import torch
import keras

EPOCHS = 7
BATCH_SIZE = 32

In [None]:
densenet = DenseNet121(
    weights='../input/densenet-keras/DenseNet-BC-121-32-no-top.h5',
    include_top=False,
    input_shape=(224,224,3)
)

In [None]:
datagen = ImageDataGenerator(zoom_range=0.1,  # set range for random zoom
        # set mode for filling points outside the input boundaries
        fill_mode='constant',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images,
        validation_split=0.2)

train_generator=datagen.flow_from_dataframe(dataframe=pivot_df, 
                                            directory="/kaggle/tmp/",
                                            x_col="Image_ID",
                                            y_col=['any', 'epidural', 'intraparenchymal', 
         'intraventricular', 'subarachnoid', 'subdural'],
                                            class_mode="other",
                                            target_size=(224,224),
                                            batch_size=BATCH_SIZE,
                                            subset = 'training')

validation_generator = datagen.flow_from_dataframe(dataframe=pivot_df, 
                                            directory="/kaggle/tmp/",
                                            x_col="Image_ID",
                                            y_col=['any', 'epidural', 'intraparenchymal', 
         'intraventricular', 'subarachnoid', 'subdural'],
                                            class_mode="other",
                                            target_size=(224,224),
                                            batch_size=BATCH_SIZE,
                                            subset = 'validation')

test_generator = datagen.flow_from_dataframe(
        test_df,
        directory='/kaggle/tmp/',
        x_col='Image_ID',
        class_mode=None,
        target_size=(224, 224),
        batch_size=7,
        shuffle=False
    )

In [None]:
# BATCH_SIZE = 32

def build_model():
    model = Sequential()
    
    model.add(densenet)
    model.add(Activation('relu'))
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(6, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=0.001),
        metrics=['accuracy']
    )
    
    return model

In [None]:
model = build_model()
model.summary()

In [None]:

 
# # train the network
# H = model.fit_generator(datagen.flow(trainX, trainY, batch_size=BS),
# 	validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
# 	epochs=EPOCHS)

checkpoint = ModelCheckpoint(
    'model.h5', 
    monitor='val_loss', 
    verbose=0, 
    save_best_only=True, 
    save_weights_only=False,
    mode='auto'
)

history = model.fit_generator(
    train_generator,
    steps_per_epoch=6,
    validation_data=validation_generator,
    validation_steps=4,
    callbacks=[checkpoint],
    epochs=EPOCHS
)

In [None]:
model.load_weights('model.h5')
y_test = model.predict_generator(test_generator,
    steps=len(test_generator),
    verbose=1
)

In [None]:
y_test

In [None]:
test_df = test_df.join(pd.DataFrame(y_test, columns = ['any', 'epidural', 'intraparenchymal', 
         'intraventricular', 'subarachnoid', 'subdural']))

In [None]:
test_df[:300]

In [None]:
# Unpivot table
test_df = test_df.melt(id_vars=['Image_ID'])
# Combine the filename column with the variable column


In [None]:
test_df['ID'] = test_df.Image_ID.apply(lambda x: x.replace('.png', '')) + '_' + test_df.variable
test_df['Label'] = test_df['value']

test_df[['ID', 'Label']].to_csv('submission.csv', index=False)

In [None]:
test_df[['ID', 'Label']].to_csv('submission.csv', index=False)

In [None]:
test_df[['ID', 'Label']].sort_values('Label')

In [None]:
from IPython.display import HTML
import pandas as pd
import numpy as np

def create_download_link(title = "Download CSV file", filename = "data.csv"):  
    html = '<a href={filename}>{title}</a>'
    html = html.format(title=title,filename=filename)
    return HTML(html)

# create a link to download the dataframe which was saved with .to_csv method
create_download_link(filename='submission.csv')