## EDA
#### Looking at the shape and layout of the raw DICOM data

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import os, glob, random, cv2, glob, pydicom

In [None]:
df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.MGMT_value.value_counts()

In [None]:
train_sample_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train'
len(os.listdir(train_sample_path)), df.BraTS21ID.nunique()

In [None]:
def read_dicom_xray(path):
    data = pydicom.read_file(path).pixel_array
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

In [None]:
# following function took from: https://www.kaggle.com/ihelon/brain-tumor-eda-with-animations-and-modeling?scriptVersionId=68202876&cellId=11
def visualize_sample(
    brats21id, 
    mgmt_value,
    slice_i,
    types=("FLAIR", "T1w", "T1wCE", "T2w")
):
    plt.figure(figsize=(16, 5))
    patient_path = os.path.join(
        train_sample_path, 
        str(brats21id).zfill(5),
    )
    for i, t in enumerate(types, 1):
        t_paths = sorted(
            glob.glob(os.path.join(patient_path, t, "*")), 
            key=lambda x: int(x[:-4].split("-")[-1]),
        )
        data = read_dicom_xray(t_paths[int(len(t_paths) * slice_i)])
        plt.subplot(1, 4, i)
        plt.imshow(data, cmap="gray")
        plt.title(f"{t}", fontsize=16)
        plt.axis("off")

    plt.suptitle(f"MGMT_value: {mgmt_value}", fontsize=16)
    plt.show()
    
    
for i in random.sample(range(df.shape[0]), 2):
    visualize_sample(df.iloc[i]["BraTS21ID"], df.iloc[i]["MGMT_value"], slice_i=0.5)

## Data Preprocessing

I used a little bit from different Kaggle notebooks to create the training and validation generator, similar to how we did the assignments for CS 190. Creating these generators are probably the most complex part of this notebook, but essentially the goal here is to create a generator which provides 2D slices of 312 x 312 pixel data from each of the four sequences (T1 precontrast, T1 postcontrast, T2, and FLAIR). Here, I set the batch size to 3, so we might want to try out GroupNorm instead of BatchNorm in the DenseNet implementation because the batch size is very small. I also used some code from a notebook which randomly selects a 2D slice from the middle 5 layers of the 3D DICOM data, so that the 2D CNN can use the best 2D slice as input. This is what the fold_generator() function does which uses the scikit-learn StratifiedKFold to basically random sample a 2D slice from the middle 5 2D slices of the MRI data. The final data shape of an output from the train generator is (3, 312, 312, 4) corresponding to a batch size of 3, an image size of 312 x 312, and the 4 MRI sequences (T1 pre, T1 post, T2, and FLAIR). Also, the preoprocessing_image() and get_data_generator() functions perform data augmentation to basically remove some of the excess black pixels around the MRI imagery. There is some manual preprocessing done in the preproceessing_image() function, but most of it is handled by the keras Data Augmentation layers (RandomRotation, RandomTranslation). Similar thing is done for the validation generator. 

In [None]:
# Data Preprocessing DICOM into 2D Slices as Generator
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip,RandomRotation,RandomTranslation

# Keras Data Augmentation
augmentation_layers = tf.keras.Sequential(
    [
        RandomRotation(factor=0.01),
        RandomTranslation(height_factor=0.0, width_factor=0.1),
    ],
    name='keras_augment_layers'
)

# More manual data augmentation
def preprocessing_image(img, augment=True):   
    img = tf.cast(img, tf.float32) / 255.0

    # only true for train set 
    if augment:
        # augment each slices 
        # todo: integrate better technique 
        splitted_img = tf.split(img, input_depth, axis=-1)

        augment_img = []
        for each_img in splitted_img:
            img = tf.repeat(each_img, repeats=3, axis=-1)
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_saturation(img, 0.9, 1.3)
            img = tf.image.random_contrast(img, 0.8, 1.2)
            img = tf.image.random_brightness(img, 0.2)
            img, _, _ = tf.split(img, 3, axis=-1)
            img = tfa.image.random_cutout(tf.expand_dims(img, 0),
                                          mask_size=(20, 20), 
                                          constant_values=0)
            augment_img.append(img)
            
        img = tf.concat(augment_img, axis=-1)
    img = tf.reshape(img, [input_height, input_width, input_depth])
    return img

def get_data_generator(data, is_train=False, shuffle=True, augment=False, repeat=True, batch_size=32):
    if repeat: 
        data = data.repeat()
    
    if shuffle:
        data = data.shuffle(batch_size * 10)
        
    data = data.map(lambda x, y: (preprocessing_image(x, augment), y), num_parallel_calls=AUTO)
    data = data.batch(batch_size, drop_remainder=is_train)
    
    if shuffle:
        data = data.map(lambda x, y: (augmentation_layers(x), y), num_parallel_calls=AUTO) 
    
    data = data.prefetch(AUTO)
    return data 

In [None]:
# data loader 
class BrainTumorGenerator(tf.keras.utils.Sequence):
    def __init__(self, dicom_path, data, is_train=True):
        self.is_train = is_train # to control training/validation/inference part         
        self.data = data
        self.dicom_path = dicom_path
        self.label = self.data['MGMT_value']
  
    def __len__(self):
        return self.data['BraTS21ID'].shape[0]
    
    def __getitem__(self, index):
        patient_ids = f"{self.dicom_path}/{str(self.data['BraTS21ID'][index]).zfill(5)}/"
   
        channel = []
        for t in ("FLAIR", "T1w", "T1wCE", "T2w"): 
            t_paths = sorted(
                glob.glob(os.path.join(patient_ids, t, "*")), 
                key=lambda x: int(x[:-4].split("-")[-1]),
            )
            
            # pick 15 slices 
            K = 15
            # computing strt, and end index 
            strt_idx = (len(t_paths) // 2) - (K // 2)
            end_idx = (len(t_paths) // 2) + (K // 2)
            # slicing extracting elements with 3 intervals 
            r = t_paths[strt_idx + 3: end_idx + 3: 3]
    
            # removing black borders 
            # and add multi-modal features maps / channel depth
            threshold = 0
            for i in r:
                image = self.read_dicom_xray(i)
                temp_image = image
                
                rows = np.where(np.max(temp_image, 0) > threshold)[0]
                if rows.size:
                    cols = np.where(np.max(temp_image, 1) > threshold)[0]
                    image = image[cols[0]: cols[-1] + 1, rows[0]: rows[-1] + 1]
                else:
                    image = image[:1, :1]
                
                channel.append(cv2.resize(image, (input_height, input_width)))
                break # remove it for r-times frames for each series
                    
        if self.is_train:
            return np.array(channel).T, self.label.iloc[index,]
        else:
            return np.array(channel).T
    
    def read_dicom_xray(self, path):
        data = pydicom.read_file(path).pixel_array
        data = data - np.min(data)
        data = data / np.max(data)
        data = (data * 255).astype(np.uint8)
        return data

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for index, (train_index, val_index) in enumerate(skf.split(X=df.index, 
                                                           y=df.MGMT_value)):
    df.loc[val_index, 'fold'] = index
    
print(df.groupby(['fold', df.MGMT_value]).size())

In [None]:
# params 
AUTO = tf.data.AUTOTUNE
input_height = 312
input_width = 312
input_depth = 4
batch_size = 3
fold = 0

def fold_generator(fold):
    # for way one - data generator
    train_labels = df[df.fold != fold].reset_index(drop=True)
    val_labels = df[df.fold == fold].reset_index(drop=True)
    
    return (
        BrainTumorGenerator(train_sample_path, train_labels),
        BrainTumorGenerator(train_sample_path, val_labels)
    )

# first fold 
train_gen, val_gen = fold_generator(fold)

In [None]:
train_data = tf.data.Dataset.from_generator(
    lambda: map(tuple, train_gen),
    (tf.float32, tf.float32),
    (
        tf.TensorShape([input_height, input_width, input_depth]),
        tf.TensorShape([]),
    ),
)

# generate train sets 
train_generator = get_data_generator(train_data, is_train=True, repeat=False, shuffle=True, augment=True, batch_size=batch_size)

In [None]:
# train generator visualization 
x, y = next(iter(train_generator))
print(x.shape, y.shape)  
plt.figure(figsize=(35, 15))
for i in range(input_depth):
    plt.subplot(1, input_depth, i + 1)
    plt.imshow(x[1 ,:, :, i], cmap="gray")
    plt.axis("off")
    plt.title(y[1].numpy())

In [None]:
# Validation data generator visualization
val_data = tf.data.Dataset.from_generator(
    lambda: map(tuple, val_gen),
    (tf.float32, tf.float32),
    (
        tf.TensorShape([input_height, input_width, input_depth]),
        tf.TensorShape([]),
    ),
)

# generate validation sets 
valid_generator = get_data_generator(val_data, is_train=True, 
                                     shuffle=False, repeat=False, augment=False, 
                                     batch_size=batch_size)

# visualization 
x, y = next(iter(valid_generator))
print(x.shape, y.shape)  
plt.figure(figsize=(35, 15))
for i in range(input_depth):
    plt.subplot(1, input_depth, i + 1)
    plt.imshow(x[0 ,:, :, i], cmap="gray")
    plt.axis("off")
    plt.title(y[0].numpy())

# 2D CNN DenseNet Implementation

I basically used the standard Keras DenseNet implementation shown on the documentation. I added a final Conv2D layer and a Sigmoid layer after the DenseNet so that we can get softmax output values. Even with this simple DenseNet implementation, we already have nearly 7 million trainable params, so we will definitely have to try condensing a U-Net ensemble model or try pretraining weights since we will have way too many parameters to train. Also, I did not set any pre-trained weights for this DenseNet implementation, but if we did we could probably greatly improve the model. I didn't add any pre-trained weights because this is our benchmark anyway, so its better to assume we use the simplest possible model we can. The other hyperparameters are the standard values (Adam optimizer, Learning Rate = 0.001, Loss=BinaryCrossEntropy). I also saved the model as a .h5 file like we did for CS 190, so if you guys want to use this pre-trained model baseline directly later on, we have that option. Also, I didn't train this model for too long, since there aren't many samples in the dataset anyway and I didn't want to overfit, so I just did 5 epochs. 

In [None]:
from tensorflow.keras import Input, Model 
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, Dense
from tensorflow.keras.applications import *

input_dim = (input_height, input_width, input_depth)
input_tensor = Input(input_dim, name='input2d')
efnet = DenseNet121(weights=None, 
                       include_top = False, 
                       input_shape=(input_height, input_width, 3))
mapping3feat = Conv2D(3, (3, 3), padding='same', use_bias=False)(input_tensor)

output = efnet(mapping3feat)
output = GlobalAveragePooling2D()(output)
output = Dense(1, activation='sigmoid')(output)

tf.keras.backend.clear_session()
model = Model(input_tensor, output)
model.summary()

In [None]:
from tensorflow import keras 
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow_addons.optimizers import RectifiedAdam, Lookahead

# compiling 
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    optimizer=Adam(learning_rate=1e-3),
    metrics=[tf.keras.metrics.AUC(), 
             tf.keras.metrics.BinaryAccuracy(name='bacc')],
)

# define callbacks.
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    "model.h5", monitor='val_auc', 
    mode='max', save_best_only=True
)


# fitting the model 
epochs = 5
model.fit(
    train_generator, 
    epochs=epochs,
    validation_data=valid_generator, 
    callbacks=[checkpoint_cb]
)

Our final training AUC is 0.5236 and our final validation AUC is 0.5280 which is around what the AUC is for a solution ranked \#600 on the leaderboard (~55\% percentile), so around the average solution. The training and validation AUCs are starting to diverge after 5 epochs, so this is probably the optimal training length before we start overfitting. I think this is a pretty good result for just a benchmark 2D CNN, so this looks promising for a complex U-Net Ensemble. 

# U-Net Implementation

In [None]:
from tensorflow import losses, optimizers
from tensorflow.keras import Input, Model, models, layers
# --- Define model

# I will be implementing a 2D U-Net architecture with the following modifications:
#     1) An Inception modification to the contracting / expanding backbone
#     2) Modification of the skip connection to include additional convolution operations

# --- Define lambda functions
conv = lambda x, filters,kernel_size, strides : layers.Conv2D(
    filters=filters, 
    kernel_size=kernel_size,
    strides=strides, 
    padding='same')(x)
norm = lambda x : layers.BatchNormalization()(x)
relu = lambda x : layers.ReLU()(x)
pool = lambda x : layers.MaxPool2D(pool_size=(3, 3), strides=1, padding='same')(x)
conv2 = lambda filters, x : relu(norm(conv(x, filters, kernel_size=(3,3),strides=(2, 2))))

# --- Define 1x1, 3x3 and 5x5 convs
conv1 = lambda filters, x : relu(norm(conv(x, filters, kernel_size=(1, 1),strides=1)))
conv3 = lambda filters, x : relu(norm(conv(x, filters, kernel_size=(3, 3),strides=1)))
conv5 = lambda filters, x : relu(norm(conv(x, filters, kernel_size=(5, 5),strides=1)))
mpool = lambda x : relu(norm(pool(x)))

# --- Define projection
proj = lambda filters, x : layers.Conv2D(
    filters=filters, 
    strides=1, 
    kernel_size=(1, 1),
    padding='same')(x)

# --- Define single transpose
tran = lambda x, filters, kernel_size, strides : layers.Conv2DTranspose(
    filters=filters, 
    kernel_size=kernel_size,
    strides=strides,
    padding='same')(x)

# --- Define transpose block
tran2 = lambda filters, x : relu(norm(tran(x, filters, kernel_size=(3,3), strides=(2, 2))))

# --- Define concat function
concat = lambda a, b : layers.Concatenate()([a, b])

# --- Define inception function
def inception(filters, prev_layer):
    # --- Define four different paths
    b1 = proj(filters, prev_layer)
    p1 = conv1(filters, prev_layer)
    p2 = conv3(filters, b1)
    p3 = conv5(filters, b1)
    p4 = proj(filters, mpool(prev_layer))
    # --- Concatenate
    return layers.Concatenate()([p1,p2,p3,p4])

In [None]:
# --- Define contracting layers
l1 = conv1(32, input_tensor)
l2 = inception(16, conv2(32, l1))
l3 = inception(32, conv2(64, l2))
l4 = inception(64, conv2(128, l3))

# --- Define expanding layers
# I'm modifying the skip connection by performing a convolution operation on the 
# concat to introduce additional convolution operations in the skip connections 
l5 = tran2(128, l4)
l6  = tran2(64, inception(64, conv3(128,concat(l3, l5))))
l7 = tran2(32, inception(32, conv3(64, concat(l2, l6))))
#l8 = tran2(8, inception(4, conv3(16, concat(l2,l8))))
l8 = inception(4, l7)

# --- Create logits
output = layers.Conv2D(filters=2, kernel_size=(3, 3),padding='same')(l8)
output = GlobalAveragePooling2D()(output)
output = Dense(1, activation='sigmoid')(output)

# --- Create model
model = Model(inputs=input_tensor, outputs=output)

In [None]:
model.summary()

In [None]:
# --- Compile model
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=[tf.keras.metrics.AUC(), 
             tf.keras.metrics.BinaryAccuracy(name='bacc')],
)

model.fit(train_generator, 
    epochs=20,
    validation_data=valid_generator)