In [None]:
# import libraries
from tensorflow import keras
import tensorflow as tf
import pydicom
import numpy as np
import pandas as pd
import os

In [None]:
# get test image path
path = '../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train/'
labels = pd.read_csv('../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train.csv')

In [None]:
SAMPLE_SIZE = 215866
SEED = 42
BATCH_SIZE = 32
NUM_CLASSES_BINARY = 1
NUM_CLASSES_MULTI = 5
SUBCLASSES = ['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']

In [None]:
# define image size
IMAGE_SIZE = (224,224)

# correct dcmd
def correct_dcm(dcm):
    x = dcm.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    dcm.PixelData = x.tobytes()
    dcm.RescaleIntercept = -1000
    
# convert dicom field values to integers 
def get_first_of_dicom_field_as_int(x):
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    return int(x)
    
# get windowing values 
def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, # window center
                    data[('0028','1051')].value, # window width
                    data[('0028','1052')].value, # intercept
                    data[('0028','1053')].value, # slope
                   ]
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]
    
# get min and max of the window values
def get_min_max_of_window_value(window_center, window_width):
    mini = window_center - (window_width // 2)
    maxi = window_center + (window_width // 2) 
    return mini, maxi

# change windowing 
def window_image(img, window_center, window_width):
    try:
        # call get_windowing function to get window values
        _,_, intercept, slope = get_windowing(img) 
        # change window values 
        img = img.pixel_array * slope + intercept
        img_min, img_max = get_min_max_of_window_value(window_center, window_width)
        img[img < img_min] = img_min
        img[img > img_max] = img_max
    except:
        img = img_min * np.ones(IMAGE_SIZE)
        
    return img


# normalize
def normalize(channel, wc_ww: tuple, norm_type = 'none'):
    if norm_type.lower() == 'none':
        return channel
    if norm_type.lower() == 'min_max':
        mini, maxi = get_min_max_of_window_value(wc_ww[0], wc_ww[1])
        resulted_channel = (channel - mini) / (maxi - mini)
        return resulted_channel
    

def bsb_window(img, third_window):
    '''
    this function preprocesses the DICOM image

        Parameters:
        - img: DICOM image

        Returns:
        - bsb_image: image array after preproessing  
    '''
    if third_window == "bone":
        third = (600, 2000)
    else:
        third = (50, 350)

    bsb_config = {'brain': (40,80),     # brain channel
             'subdural': (80,200),      # subdural channel
             third_window: third}       # bone channel

    brain_img = window_image(img, *bsb_config['brain'])         # image with brain channel
    subdural_img = window_image(img,*bsb_config['subdural'])    # image with subdural channel
    third_img = window_image(img, *bsb_config[third_window])           # image with bone channel
    
    brain_img = normalize(brain_img, bsb_config['brain'], 'min_max')                # normalize image with brain channel
    subdural_img = normalize(subdural_img, bsb_config['subdural'], 'min_max')       # normalize image with subdural channel
    third_img = normalize(third_img, bsb_config[third_window], 'min_max')                   # normalize image with bone channel

    # preprocessed image
    bsb_img = np.zeros((brain_img.shape[0], brain_img.shape[1], 3)) 
    bsb_img[:, :, 0] = brain_img
    bsb_img[:, :, 1] = subdural_img
    bsb_img[:, :, 2] = third_img
    
    if (np.any(np.isnan(bsb_img))):
        bsb_img = np.ones((*IMAGE_SIZE,3)) # reshape image 
        
    return bsb_img

def preprocess_img_soft(dcm):
  if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
          correct_dcm(dcm)
  img = bsb_window(dcm, third_window="soft")
  img = tf.convert_to_tensor(img, dtype=tf.float64)
  return img

def preprocess_img_bone(dcm):
  if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
          correct_dcm(dcm)
  img = bsb_window(dcm, third_window="bone")
  img = tf.convert_to_tensor(img, dtype=tf.float64)
  return img

In [None]:
class ImageGenerator_soft(tf.keras.utils.Sequence):
    def __init__(self, dataframe,batch_size,shuffle, num_classes):
        self.dataframe = dataframe
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.shuffle = shuffle
        
    def __len__(self):
        return math.ceil(len(self.dataframe) / self.batch_size)
    
    def __getitem__(self, index):
        batch_df = self.dataframe.iloc[index * self.batch_size: (index+1) * self.batch_size]
        paths = path + batch_df.index.astype(str)
        X = np.empty((len(batch_df), *IMAGE_SIZE, 3))
        y = np.empty((len(batch_df), self.num_classes))
        for i, p in enumerate(paths):
            dcm = pydicom.dcmread(p)
            # correct dcm
            if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
                correct_dcm(dcm)

            img = bsb_window(dcm, third_window="soft")
            img = tf.convert_to_tensor(img, dtype=tf.float64)
            X[i] = tf.image.resize(img, IMAGE_SIZE)
            y[i] = batch_df.iloc[i].values
            
        return X, y
    
    def on_epoch_end(self):
        if self.shuffle:
            self.dataframe = self.dataframe.sample(len(self.dataframe), replace = False, random_state = SEED)
        self.current_epoch += 1

In [None]:
class ImageGenerator_bone(tf.keras.utils.Sequence):
    def __init__(self, dataframe,batch_size,shuffle, num_classes):
        self.dataframe = dataframe
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.shuffle = shuffle
        
    def __len__(self):
        return math.ceil(len(self.dataframe) / self.batch_size)
    
    def __getitem__(self, index):
        batch_df = self.dataframe.iloc[index * self.batch_size: (index+1) * self.batch_size]
        paths = path + batch_df.index.astype(str)
        X = np.empty((len(batch_df), *IMAGE_SIZE, 3))
        y = np.empty((len(batch_df), self.num_classes))
        for i, p in enumerate(paths):
            dcm = pydicom.dcmread(p)
            # correct dcm
            if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
                correct_dcm(dcm)

            img = bsb_window(dcm, third_window="bone")
            img = tf.convert_to_tensor(img, dtype=tf.float64)
            X[i] = tf.image.resize(img, IMAGE_SIZE)
            y[i] = batch_df.iloc[i].values
            
        return X, y
    
    def on_epoch_end(self):
        if self.shuffle:
            self.dataframe = self.dataframe.sample(len(self.dataframe), replace = False, random_state = SEED)
        self.current_epoch += 1

In [None]:
#load saved Binary model
Binary_model = keras.models.load_model('../input/final-binary/best_model_densenet201.h5')

In [None]:
# custom loss function <ref?> 
def np_multilabel_loss(class_weights=None):
    def single_class_crossentropy(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        
        y_pred = tf.where(y_pred > 1-(1e-07), 1-1e-07, y_pred)
        y_pred = tf.where(y_pred < 1e-07, 1e-07, y_pred)
        single_class_cross_entropies = - tf.reduce_mean(y_true * tf.math.log(y_pred) + (1-y_true) * tf.math.log(1-y_pred), axis=0)

        if class_weights is None:
            loss = tf.reduce_mean(single_class_cross_entropies)
        else:
            loss = tf.reduce_sum(class_weights*single_class_cross_entropies)
        return loss
    return single_class_crossentropy

In [None]:
#load saved Multilabel model
Multilabel = keras.models.load_model('../input/multilabel/multilabel.h5', custom_objects={"single_class_crossentropy": np_multilabel_loss})

In [None]:
labels.head()

In [None]:
label = labels.Label
labels = labels.ID.str.rsplit('_', n=1, expand = True)
labels['label'] = label
labels.rename({0:'id', 1: 'subtype'}, axis =1, inplace=True)
labels.head()

In [None]:
labels = pd.pivot_table(labels, index='id', columns='subtype', values = 'label')
labels.head()

In [None]:
labels.index = labels.index.astype(str) + '.dcm'
labels.head()

In [None]:
normal_df = labels[labels['any'] == 0]       # normal scans
abnormal_df = labels[labels['any'] == 1]     # abnormal scans

In [None]:
normal_sample = normal_df.sample(SAMPLE_SIZE//2, replace = False, random_state = SEED, axis = 0)
abnormal_sample = abnormal_df.sample(SAMPLE_SIZE//2, replace = False, random_state = SEED, axis = 0)
sample_df = normal_sample.append(abnormal_sample)
sample_df = sample_df.sample(frac = 1, random_state = SEED, axis = 0)
sample_df.head()

In [None]:
binary_df = pd.DataFrame(sample_df['any'])

In [None]:
img_generator = ImageGenerator_soft(binary_df, BATCH_SIZE, shuffle=False, num_classes=NUM_CLASSES_BINARY)

In [None]:
test_data_binary = tf.data.Dataset.from_generator(lambda: map(tuple, img_generator), 
                                          output_types=(tf.float64, tf.uint8),
                                          output_shapes = (
                                                    tf.TensorShape((None, *IMAGE_SIZE,3)),
                                                    tf.TensorShape((None, NUM_CLASSES_BINARY))
                                          ))

In [None]:
import math

#predict normal/abnormal 
binaryPred = Binary_model.predict(test_data_binary)
binary_res = pd.DataFrame({'prob': binaryPred.flatten()}, index=binary_df.index)
binary_res.head()

In [None]:
abnormal_pred = binary_res[binary_res['prob'] > 0.5]
abnormal_pred.head()

-----------------------------

In [None]:
multi_df = sample_df.loc[abnormal_pred.index]
multi_df

In [None]:
multi_df = multi_df.drop(columns=['any'])

In [None]:
img_generator_multi = ImageGenerator_bone(multi_df, BATCH_SIZE, shuffle=False, num_classes=NUM_CLASSES_MULTI)

In [None]:
test_data_multi = tf.data.Dataset.from_generator(lambda: map(tuple, img_generator_multi), 
                                          output_types=(tf.float64, tf.uint8),
                                          output_shapes = (
                                                    tf.TensorShape((None, *IMAGE_SIZE,3)),
                                                    tf.TensorShape((None, NUM_CLASSES_MULTI))
                                          ))

In [None]:
# if abnormality is detected, predict the type of the hemorrhage 
multiPred = Multilabel.predict(test_data_multi)


In [None]:
multi_res = pd.DataFrame(multiPred, columns = SUBCLASSES[1:], index=multi_df.index)
multi_res.head()

In [None]:
multi_res['any'] = abnormal_pred.prob
multi_res.head()

In [None]:
normal_pred = binary_res[binary_res['prob'] <= 0.5]
normal_pred.head()

In [None]:
normal_pred = normal_pred.rename(columns={'prob':'any'})
normal_pred.head()

In [None]:
normal_pred[SUBCLASSES[1:]] = 0
normal_pred.head()

In [None]:
whole_pred = pd.concat((normal_pred, multi_res))
whole_pred

In [None]:
sample_df = sample_df.loc[whole_pred.index]
sample_df

In [None]:
sample_df.to_csv('testing_data.csv')
whole_pred.to_csv('predicted_data.csv')

In [None]:
# # check the results of the binary prediction 
# if binaryPred > 0.5:
#     # if abnormality is detected, predict the type of the hemorrhage 
#     multiPred = Multilabel.predict(img)
    
# else:
#     print('No hemorrhage detected')

# Testing

In [None]:
y_pred = pd.read_csv('predicted_data.csv')
y_test = pd.read_csv('testing_data.csv')

In [None]:
columns = list(y_test.columns)
columns = columns[1:]

In [None]:
y_pred = pd.pivot_table(y_pred, index='id')
y_test = pd.pivot_table(y_test, index='id')

In [None]:
y_pred = (y_pred > 0.5) 


In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred,target_names=columns))


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_test,y_pred)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(y_test,y_pred,average='samples')
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(y_test,y_pred,average='samples')
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_test,y_pred,average='samples')
print('F1 score: %f' % f1)
