In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import pydicom
import cv2
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.utils.vis_utils import plot_model
from keras.layers import Input, concatenate 
from keras.models import Model
import os
import tensorflow_transform as tft
from sklearn.decomposition import PCA
import tensorflow_addons as tfa


# gpus = tf.config.experimental.list_physical_devices('GPU') 
# for gpu in gpus: 
#     tf.config.experimental.set_memory_growth(gpu, True)

# Globals

In [None]:
SIZE = 100
START = 0
INPUT_PATH = "../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/"
train_images_dir = INPUT_PATH + 'stage_2_train/'
test_images_dir = INPUT_PATH + 'stage_2_test/'

# Loading Data
We will load data with pandas

In [None]:
def load_dataframe(csv_file):
    info = pd.read_csv(csv_file)
    info['Sub_type'] = info['ID'].str.split("_", n = 3, expand = True)[2]
    info['ID'] = info['ID'].str.split("_", n = 3, expand = True)[0]+'_'+info['ID'].str.split("_", n = 3, expand = True)[1]
    info = info.sort_values(by=['ID','Sub_type'], ignore_index=True)

    info['any'] = np.where(info['Sub_type'] == 'any', info['Label'], 0)
    info['epidural'] = np.where(info['Sub_type'] == 'epidural', info['Label'], 0)
    info['intraparenchymal'] = np.where(info['Sub_type'] == 'intraparenchymal', info['Label'], 0)
    info['intraventricular'] = np.where(info['Sub_type'] == 'intraventricular', info['Label'], 0)
    info['subarachnoid'] = np.where(info['Sub_type'] == 'subarachnoid', info['Label'], 0)
    info['subdural'] = np.where(info['Sub_type'] == 'subdural', info['Label'], 0)

    df = info.drop(columns=['Label', 'Sub_type']).groupby('ID').sum().reset_index()
    
    return df

In [None]:
df = load_dataframe(INPUT_PATH + "stage_2_train.csv")
test_df = load_dataframe(INPUT_PATH + 'stage_2_sample_submission.csv')
print("Loaded!!!")

In [None]:
# df.to_csv('train.csv')
# test_df.to_csv('test.csv')

In [None]:
dfw = df.copy()
# zeros = df.copy()
# # dfw = dfw[['ID','any']]
# zeros = zeros[zeros['any'] == 0]

# print("any:", len(dfw[dfw['any']==1]))
# print("epidural:", len(dfw[dfw['epidural']==1]))
# print("intraparenchymal:", len(dfw[dfw['intraparenchymal']==1]))
# print("intraventricular", len(dfw[dfw['intraventricular']==1]))
# print("subarachnoid:", len(dfw[dfw['subarachnoid']==1]))
# print("subdural:", len(dfw[dfw['subdural']==1]))

# zeros_to_be_dropped = np.random.choice(zeros.index, 6*len(zeros)//7, replace=False)
# dfw = dfw.drop(index=zeros_to_be_dropped)
train, test = train_test_split(dfw, test_size=0.2, shuffle=True)

In [None]:
dfw.head()

In [None]:
# train_images = os.listdir(train_images_dir)
# test_images = os.listdir(test_images_dir)

# print('done')

# Windowing Function
It will take a dicom image and convert into an RGB like image

In [None]:
def final_windowing(img_path):

    window_sizes = [(40, 80), (75,215), (600,2800)]

    def preprocess(img_path, WINDOW_LEVEL, WINDOW_WIDTH):
        # params
        window_min = WINDOW_LEVEL-(WINDOW_WIDTH // 2)
        window_max = WINDOW_LEVEL+(WINDOW_WIDTH // 2)
        # read dicom file
        r = pydicom.read_file(img_path)
        # convert to hounsfield unit
        img = (r.pixel_array * r.RescaleSlope) + r.RescaleIntercept
        # apply brain window
        img = np.clip(img, window_min, window_max)
        img = 255 * ((img - window_min)/WINDOW_WIDTH)
        img = img.astype(np.uint8)
        return img

    new_arr = []
    try:
        for x in window_sizes:
            imag = preprocess(img_path, x[0], x[1] )
            new_arr.append(imag)
    except:
        new_arr = []
        for i in range(3):
            new_arr.append(np.zeros((512, 512)))

    new_arr_2 = np.dstack((new_arr[0], new_arr[1], new_arr[2]))

    return new_arr_2

In [None]:
# fig=plt.figure(figsize=(20, 10))
# columns = 4; rows = 2
# for i in range(1, rows*rows +1):
#     ds = pydicom.dcmread(train_images_dir + train_images[i]).pixel_array
#     fig.add_subplot(rows, columns, i)
#     plt.imshow(ds, cmap=plt.cm.bone)
#     fig.add_subplot

# for i in range(1, rows*rows +1):
#     ds = final_windowing(train_images_dir + train_images[i])
#     fig.add_subplot(rows, columns, i+4)
#     plt.imshow(ds, cmap=plt.cm.bone)
#     fig.add_subplot

### 

# Custom Data Generator

In [None]:
class CustomDataGen(tf.keras.utils.Sequence):
    
    def __init__(self, directory, df,
                 preprocessing_function=final_windowing,
                 batch_size=32,
                 shuffle=False):
        
        self.directory = directory
        self.df = df.copy()
        self.preprocessing_function = preprocessing_function
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        self.n = len(self.df)
        self.n_name = df['ID'].nunique()
        self.types = [
            'any',
            'epidural', 
            'intraparenchymal', 
            'intraventricular', 
            'subarachnoid', 
            'subdural'
        ]
        self.n_type = len(self.types)
    
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
    
    def pre_process(self, image):
        img_arr = self.preprocessing_function(image)
        img_arr = img_arr/255 # normalization
        return img_arr
    
    def __get_input(self, image):
        image_arr = self.pre_process(self.directory + image + '.dcm')
        image_arr.resize((224,224,3))
        return image_arr
    
    def __get_data(self, batches):
        # Generates data containing batch_size samples
        """ 
        For a batch data frame, use ID column and add .dcm ext to get file name, and read the file, and use as X_batch.
        For a batch data frame, use any, epidural, intraparenchymal, intraventricular, subarachnoid, subdural as columns 
        to get the Type value (in one hot encoding).
        """
        file_names = np.asarray(batches['ID'])
        file_types = np.asarray(batches[self.types])

        X_batch = np.asarray([self.__get_input(file) for file in file_names])
        y_batch = file_types

        return X_batch, y_batch
    
    def __getitem__(self, index):
        batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)        
        return X, y
    
    def __len__(self):
        return self.n // self.batch_size

# Generating Image DataFlow

In [None]:
train_datagen = CustomDataGen(
    directory = train_images_dir,
    preprocessing_function = final_windowing,
    df = train,
)

val_datagen = CustomDataGen(
    directory = train_images_dir,
    preprocessing_function = final_windowing,
    df = test,
)

In [None]:
# print(train_datagen.__getitem__(1)[0][4])
train_datagen.__getitem__(1)[1][4]

# The Neural Net Model

In [None]:
# class CustomPCALayer(tf.keras.layers.Layer):
#     def __init__(self, num_outputs):
#         super(CustomPCALayer, self).__init__()
#         self.num_outputs = num_outputs
#         self.total = tf.Variable(initial_value=tf.zeros((num_outputs,)), trainable=False)

#     def call(self, inputs):
#         return tft.pca(x=inputs, output_dim=self.num_outputs, dtype=tf.float32)


# class MyPCA(tf.keras.layers.Layer):
#     def __init__(self, num_outputs):
#         super(MyPCA, self).__init__()
#         self.num_outputs = num_outputs
#         self.total = tf.Variable(initial_value=tf.zeros((num_outputs,)), trainable=False)

#     def call(self, inputs):
#         pca = PCA(n_components=self.num_outputs)
#         inputs = pca.fit_transform(inputs)
#         return inputs

In [None]:
# class Linear(tf.keras.layers.Layer):
#     def __init__(self, units=32, input_dim=32):
#         super(Linear, self).__init__()
#         self.w = self.add_weight(
#             shape=(input_dim, units), initializer="random_normal", trainable=True
#         )
#         self.b = self.add_weight(shape=(units,), initializer="zeros", trainable=True)

#     def call(self, inputs):
#         return tf.matmul(inputs, self.w) + self.b

In [None]:
# import keras.backend as K
# tf.compat.v1.enable_eager_execution()

# def custom_loss(y_t, y):
#     '''
#     (y⋅ln(sigmoid(logits))+(1−y)⋅ln(1−sigmoid(logits)))
    
#     Multi-label cross-entropy
#     * Required "Wp", "Wn" as positive & negative class-weights
#     y_true: true value
#     y_logit: predicted value
#     '''
#     y_t = y_t.numpy()
#     y = y.numpy()
    
# #     y_t = y_t.eval(session=tf.compat.v1.Session())
# #     y = y.eval(session=tf.compat.v1.Session())
    
#     loss = float(0)
    
#     for i in range(len(y_t)):
#         loss += y_t[i]*np.log(y[i]) + (1-y_t[i])*np.log(1-y)
#     return loss

# def f1_score(y_true, y_logit):
#     '''
#     Calculate F1 score
#     y_true: true value
#     y_logit: predicted value
#     '''
#     true_positives = K.sum(K.round(K.clip(y_true * y_logit, 0, 1)))
#     possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
#     recall = true_positives / (possible_positives + K.epsilon())
#     predicted_positives = K.sum(K.round(K.clip(y_logit, 0, 1)))
#     precision = true_positives / (predicted_positives + K.epsilon())
#     return (2 * precision * recall) / (precision + recall + K.epsilon())


# def custom_loss_function(y_true, y_pred):
#     squared_difference = tf.square(y_true - y_pred)
#     return tf.reduce_mean(squared_difference, axis=-1)

In [None]:
input_net = Input((224,224,3))

## Encoder starts
inception = tf.keras.applications.InceptionV3(
        include_top=False,
        weights="imagenet",
        input_tensor=None,
        input_shape=None,
        pooling=None,
        classifier_activation="softmax",
    )(input_net)

pooling = tf.keras.layers.GlobalMaxPool2D()(inception)

reshaping = tf.keras.layers.Reshape((2048,1))(pooling)

blstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(256),
        )(reshaping)


merged = concatenate([pooling,blstm], axis = 1)

output_net = tf.keras.layers.Dense(6, activation="sigmoid")(merged)

model = Model(inputs = input_net, outputs = output_net)

# Cyclical Learning Rate
# steps_per_epoch = len(train_datagen) // 8
# clr = tfa.optimizers.CyclicalLearningRate(
#     initial_learning_rate=1e-4,
#     maximal_learning_rate= 1e-2,
#     scale_fn=lambda x: 1/(2.**(x-1)),
#     step_size=2 * steps_per_epoch
# )
# optimizer = tf.keras.optimizers.SGD(clr)

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss = 'binary_crossentropy',
    metrics=[tf.keras.metrics.Precision(), "accuracy","MeanSquaredError", "categorical_accuracy"]
)

In [None]:
# model = tf.keras.models.Sequential([
    
#     tf.keras.applications.InceptionV3(
#         include_top=False,
#         weights="imagenet",
#         input_tensor=None,
#         pooling=None,
#         classifier_activation="relu",
#     ),
#     tf.keras.layers.GlobalMaxPool2D(),
#     tf.keras.layers.Dense(6, activation="sigmoid")
# ])

# optimizer = tf.keras.optimizers.Adam()

# model.compile(
#     optimizer=optimizer,
#     loss="binary_crossentropy",
#     metrics=[tf.keras.metrics.Precision(), "accuracy","MeanSquaredError", "categorical_accuracy"]
# )

In [None]:

plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
model.fit(train_datagen, validation_data=val_datagen, epochs=5)

# Evaluate how well model performs
model.evaluate(val_datagen, verbose=2)

In [None]:
# from matplotlib import pyplot as plt
# history = model.fit(train_datagen, epochs=1)
# plt.plot(history.history['loss'])
# plt.plot(history.history['accuracy'])
# plt.title('model accuracy')
# plt.ylabel('accuracy')
# plt.xlabel('epoch')
# plt.legend(['train', 'val'], loc='upper left')
# plt.show()

In [None]:
# i = 3

# for i in range(30):
#     x = val_datagen.__getitem__(i)[0][0:8]
#     y = val_datagen.__getitem__(i)[1][0:8]

#     res = model.predict([x])

#     # print('x',x)
#     print('y',y)
#     print('res',res)
    
#     print('-------------------------')

## Manually Getting Data by Reading DIcom FIle

In [None]:
# subset_train_arr = train_images[START:START+SIZE]
# sub_train_pixel_arr = [ final_windowing(train_images_dir + img)  for img in subset_train_arr]
# subset_train_arr = [x.replace('.dcm', '') for x in subset_train_arr]
# output = df.query("ID == @subset_train_arr",).reset_index()
# print(output.head())
# sub_train_pixel_arr = np.array(sub_train_pixel_arr)
# print(sub_train_pixel_arr.shape)
# output_arr = output.drop(columns=['index','ID']).values
# print(output_arr.shape)


# Prediction

In [None]:
# x = model.predict(X_training[0:20])
# x[x<.5] = 0
# x[x>=.5] = 1
# print(x)
# print(y_training[0:20])

In [None]:
# loss_per_epoch = model.history.history['loss']
# plt.plot(range(len(loss_per_epoch)),loss_per_epoch)