In [None]:
!pip install ../input/kerasapplications 
!pip install ../input/classificationmodelsed/keras-2.6.0-py2.py3-none-any.whl
!pip install ../input/classificationmodelsed/classification_models_3D-1.0.2-py3-none-any.whl

@InProceedings{RSolovyev_2021_stalled,
  author = {Solovyev, Roman and Kalinin, Alexandr A. and Gabruseva, Tatiana},
  title = {3D Convolutional Neural Networks for Stalled Brain Capillary Detection},
  booktitle = {Arxiv: 2104.01687},
  month = {April},
  year = {2021}
}

# RSNA MICCAI Brain Tumor Radiogenomic Classification using 3D Conv [TF]
In this notebook we will learn to train a 3D conv model using transfer learning approach. 
We will be using all the MRI types from the dataset, and in the inference use a blending based appraoch to predict.
Please refer to the inference notebook (TBD) to see how the trained models are used to predict. 

In [None]:
import os
import re 
import glob
import numpy as np
import pandas as pd
import cv2
import seaborn as sns
from pathlib import Path
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')
import random as rn
import matplotlib.pyplot as plt
import imageio
import pydicom
import math
from numpy.random import default_rng
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# Deep learning packages
import tensorflow as tf
from tensorflow.keras.callbacks import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from random import shuffle

from pydicom.pixel_data_handlers.util import apply_voi_lut
from classification_models_3D.tfkeras import Classifiers

In [None]:
config = {
  'images_source_path' : '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train',
  'test_images_source_path' : '../input/rsna-miccai-brain-tumor-radiogenomic-classification/test',
  'csv_path': '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv',
  'data_path': '../input/rsna-miccai-brain-tumor-radiogenomic-classification',
  'nfolds': 3,
  'global_seed': 42,
  'batch_size': 2,
  'frames_per_seq': 16,
  'img_size': 128,
  'learning_rate': 0.0001,
  'num_epochs': 15,
  'channels': 3,
  'scale' : 0.8
}

# mri_types = ['FLAIR'] 
mri_types = ['FLAIR','T1w','T1wCE','T2w']

In [None]:
def set_seed(seed):
    rn.seed(seed)
    np.random.seed(seed)
    tf.compat.v1.random.set_random_seed(seed)

set_seed(config['global_seed'])

In [None]:
df_data = pd.read_csv(config['csv_path'])
df_data["folder_name"] = [format(x, "05d") for x in df_data["BraTS21ID"]]
df_data["folder_path"] = [os.path.join(config['images_source_path'], x) for x in df_data["folder_name"]]
skf = StratifiedKFold(n_splits=config['nfolds'], shuffle=True, random_state=config['global_seed'])
for index, (train_index, val_index) in enumerate(skf.split(X=df_data.index, y=df_data.MGMT_value)):
    df_data.loc[val_index, 'fold'] = index
# df_data = df_data.head(30)
# data from following patients is invalid as per organizer.
df_data = df_data[~df_data.folder_name.isin(["00109", "00123", "00709"])]
len(df_data)

In [None]:
class Dataset(tf.keras.utils.Sequence):
    def __init__(self,df,is_train=True,batch_size=config['batch_size'],shuffle=True):
        self.idx = df["BraTS21ID"].values
        self.paths = df["folder_path"].values
        self.y =  df["MGMT_value"].values
        self.is_train = is_train
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.df = df
        
    def __len__(self):
        return math.ceil(len(self.idx)/self.batch_size)
   

    def rotate_image(self, image, angle):
        image_center = tuple(np.array(image.shape[1::-1]) / 2)
        rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
        result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
        return result
    
    def __getitem__(self,ids):
        
        id_path= self.paths[ids]
        
        batch_paths = self.paths[ids * self.batch_size:(ids + 1) * self.batch_size]
        
        if self.y is not None:
            batch_y = self.y[ids * self.batch_size: (ids + 1) * self.batch_size]
        
        if self.is_train:
            list_x =  [self.load_dicom_images_3d(x,split="train") for x in batch_paths]
            batch_X = np.stack(list_x, axis=0)
            return batch_X,batch_y
        else:
            list_x =  self.load_dicom_images_3d(id_path,split="test")
            batch_X = np.stack(list_x)
            return batch_X
    
    def load_dicom_images_3d(self, scan_id, num_imgs=config['frames_per_seq'], img_size=config['img_size'], 
                             mri_type=mri_types[0], split="train", rotate=0):

        target_file_paths = self.get_img_path_3d(scan_id, mri_type)
        
        img3d = np.array([self.read_mri(f) for f in target_file_paths]) # (12, 256, 256, 3)
        
        if img3d.shape[0] < num_imgs:
            n_zero = np.zeros((num_imgs - img3d.shape[0],img_size, img_size, config['channels']))
            img3d = np.concatenate((img3d,  n_zero), axis = 0)
        
        if np.min(img3d) < np.max(img3d):
            img3d = img3d - np.min(img3d)
            img3d = img3d / np.max(img3d)
        
        return img3d
     
    def crop_center_square(self, frame, scale=config['scale']):
        y, x = frame.shape[0:2]
        center_x, center_y = x / 2, y / 2
        width_scaled, height_scaled = x * scale, y * scale
        left_x, right_x = center_x - width_scaled / 2, center_x + width_scaled / 2
        top_y, bottom_y = center_y - height_scaled / 2, center_y + height_scaled / 2
        return frame[int(top_y):int(bottom_y), int(left_x):int(right_x)]
    
    def read_mri(self, path, voi_lut = True, fix_monochrome = True):
        # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
        dicom = pydicom.read_file(path)
        if voi_lut:
            data = apply_voi_lut(dicom.pixel_array, dicom)
        else:
            data = dicom.pixel_array
        if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        data = data - np.min(data)
        data = data / np.max(data)
        data = (data * 255).astype(np.uint8)
        data = self.rotate_image(data, np.random.randint(0,20))
        data = self.crop_center_square(data)
        data = cv2.resize(data, (config['img_size'], config['img_size']))
        data = np.repeat(data[..., np.newaxis], 3, -1) # 256,256,3
        return data

    def get_img_path_3d(self, scan_id, mri_type):
        modality_path = os.path.join(scan_id, mri_type)
        total_img_num = len(glob.glob(f"{modality_path}/*.dcm"))
        files = sorted(glob.glob(f"{modality_path}/*.dcm"), 
                       key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
        mid_num = total_img_num // 2
        num_3d2 = config['frames_per_seq'] // 2
        start_idx = max(0, mid_num - num_3d2)
        end_idx = min(len(files), mid_num + num_3d2)
        target_file_paths = files[start_idx:end_idx]
        return target_file_paths

    def on_epoch_end(self):
        if self.shuffle and self.is_train:
            ids_y = list(zip(self.idx, self.y))
            shuffle(ids_y)
            self.idx, self.y = list(zip(*ids_y))    

In [None]:
train_dataset = Dataset(df_data,batch_size=config['batch_size'])

for i in range(2):
    images, label = train_dataset[i]
    print("Dimension of the CT scan is:", images.shape)
    print("label=",label.shape)
    plt.imshow(images[0,0,:,:,0], cmap="gray")
    plt.show()

In [None]:
def get_3d_model(width=config['img_size'], height=config['img_size'], depth=config['frames_per_seq'], model_arch='custom'):
    """Build a 3D convolutional neural network model."""
    inputs = tf.keras.Input((width, height, depth, config['channels']))
    if model_arch == "custom":
        x = Conv3D(filters=64, kernel_size=3, padding='same', activation="relu")(inputs)
        x = MaxPool3D(pool_size=2)(x)
        x = BatchNormalization()(x)
    
        x = Conv3D(filters=64, kernel_size=3, padding='same', activation="relu")(inputs)
        x = MaxPool3D(pool_size=2)(x)
        x = BatchNormalization()(x)
    
        x = Conv3D(filters=128, kernel_size=3, padding='same', activation="relu")(inputs)
        x = MaxPool3D(pool_size=2)(x)
        x = BatchNormalization()(x)
    
        x = Conv3D(filters=256, kernel_size=3, padding='same', activation="relu")(x)
        x = MaxPool3D(pool_size=2)(x)
        x = BatchNormalization()(x)

        x = GlobalAveragePooling3D()(x)
        x = Dense(units=512, activation="relu")(x)
        x = Dropout(0.08)(x)

        outputs = Dense(units=1, activation="sigmoid")(x)
        model = tf.keras.Model(inputs, outputs, name="3dcnn")
    else:
        input_shape = (depth, width,height)
        inputs = tf.keras.layers.Input((*input_shape,3), name='inputs')
        x = Conv3D(filters=3, kernel_size = 3, strides=(1, 1, 1), padding='same', use_bias=True)(inputs)
        net, _ = Classifiers.get(model_arch)
        x = net(input_shape=(*input_shape,3),include_top=False, weights='imagenet')(x)
        x = GlobalAveragePooling3D()(x)
        x = Dropout(rate=0.5)(x)
        outputs = Dense(1, activation='sigmoid', dtype='float32')(x)
        model  = tf.keras.Model(inputs, outputs, name=model_arch)
    return model

model = get_3d_model(model_arch='seresnet50')
model.summary()


In [None]:
def plot(history):
    fig, ax = plt.subplots(1, 3, figsize=(20, 7))
    ax = ax.ravel()
    for fold in history:
        for i, metric in enumerate(["accuracy","loss","auc"]):
            ax[i].plot(history[fold].history[metric], label="train "+str(fold))
            ax[i].plot(history[fold].history["val_" + metric], linestyle="dotted", label="val "+str(fold))
            ax[i].set_title("Model {}".format(metric))
            ax[i].set_xlabel("epochs")
            ax[i].set_ylabel(metric)
            ax[i].legend()
    
def train_each_mri_type(mri_types, model_arch):
    history = {}
    for m_type in mri_types:
        model = get_3d_model(model_arch=model_arch)
        print(f"Training for {m_type}")
        print('*'*100)
        train_dataset = Dataset(df_data,batch_size=config['batch_size'])
        valid_dataset = Dataset(df_data,batch_size=config['batch_size'])
        optimizer = tf.keras.optimizers.Adam(learning_rate=config['learning_rate'])
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min', restore_best_weights=True)
        model_checkpoint = tf.keras.callbacks.ModelCheckpoint(f'{model_arch}_{m_type}.h5', save_best_only=True, save_weights_only=False)
        LR = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.000001, verbose=1, mode='min')
        model.compile(optimizer=optimizer,loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
        log =  model.fit(train_dataset,validation_data=valid_dataset,epochs=config['num_epochs'],
                         shuffle=True, callbacks=[LR, early_stopping, model_checkpoint])
        history[m_type] = log
    plot(history)
    
train_each_mri_type(mri_types, "seresnet50")