In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns 
import matplotlib.pyplot as plt
import pydicom # A library that loads dicom(dcm) files 
import os
import glob
from IPython.display import Markdown # we will require this to print Markdown in the console
from tensorflow.keras.applications.resnet50 import ResNet50 

from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.layers import Input, GRU
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPool2D, MaxPooling2D,BatchNormalization, TimeDistributed, LSTM
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

import datetime

from pathlib import Path
import re
from imageio import imread
import cv2
from sklearn.model_selection import train_test_split


In [None]:
input_df = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv"
sample = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv"
 

### This code has been inspired from https://keeganfdes03.medium.com/making-an-eda-on-medical-images-b823693a517a

In [None]:
!pip install celluloid

In [None]:
from celluloid import Camera

In [None]:
train_df = pd.read_csv(input_df)
train_df

In [None]:
sample_df = pd.read_csv(sample)
sample_df

In [None]:
def load_dicom(path):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array   
    #print(data.shape)
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data/np.max(data)
    data = (data*255).astype(np.uint8)   
    return data    

In [None]:
def visualize_sample(ID,path, type_="flair"):
    plt.figure(figsize=(16,5))
    data = load_dicom(path)
    plt.imshow(data)
    label = train_df[train_df['BraTS21ID'] == ID]["MGMT_value"].item()
    plt.title(str(ID) + " " + type_ + " MGMT_value: " + str(label))
    plt.axis("off")   

In [None]:
visualize_sample(0, "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-104.dcm", "FLAIR")

In [None]:
train_df

In [None]:
train_df = train_df[~train_df['BraTS21ID'].isin([109, 123, 709])]

In [None]:
train_df

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(train_df['BraTS21ID'], train_df['MGMT_value'], stratify = train_df['MGMT_value'], random_state = 42, test_size = 0.2)

In [None]:
type(X_train)

In [None]:
y_train

In [None]:
len(X_train)

In [None]:
X_valid.shape

In [None]:
train_df

In [None]:
from matplotlib import animation, rc
rc('animation', html='jshtml')


def create_animation(ims):
    fig = plt.figure(figsize=(6, 6))
    plt.axis('off')
    im = plt.imshow(ims[0], cmap="gray")

    def animate_func(i):
        im.set_array(ims[i])
        return [im]

    return animation.FuncAnimation(fig, animate_func, frames = len(ims), interval = 1000//24)

In [None]:
def load_dicom_line(path):
    t_paths = sorted(
        glob.glob(os.path.join(path, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    images = []
    for filename in t_paths:
        data = load_dicom(filename)
        if data.max() == 0:
            continue
        images.append(data)
        
    return images

In [None]:
# images = load_dicom_line("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/T2w")
# create_animation(images)

In [None]:
size = 224

In [None]:
def crop_resize_image(img):
    #if img.shape[0] == 512:
        #img = crop(img, ((10, 10), (10, 10), (0,0)), copy=False)     
    img = cv2.resize(img, (size, size)) 
    return img

In [None]:
def normalize(x):
    dicom = pydicom.read_file(x)
    data = dicom.pixel_array       
    #print(data)
    data = crop_resize_image(data)
    normalised_data = (data.astype(float) - 128) / 128
    #plt.imshow(normalised_data)
    #plt.show()
    return normalised_data     

In [None]:
 
SIZE = 256
NUM_IMAGES = 64
data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
from pydicom.pixel_data_handlers.util import apply_voi_lut

def load_dicom_image(path, img_size=SIZE, voi_lut=True, rotate=0):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
        
    if rotate > 0:
        rot_choices = [0, cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE, cv2.ROTATE_180]
        data = cv2.rotate(data, rot_choices[rotate])
        
    data = cv2.resize(data, (img_size, img_size))
    return data

def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train", rotate=0):

    files = sorted(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"), 
               key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)]) 
    #print(len(files))
    middle = len(files)//2
    #print(middle)
    #print(num_imgs)
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    #print(p1)
    p2 = min(len(files), middle + num_imgs2)
    #print(p2)
    img3d = np.stack([load_dicom_image(f, rotate=rotate) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)

    if np.min(img3d) < np.max(img3d):
        img3d = img3d - np.min(img3d)
        img3d = img3d / np.max(img3d)

    return np.expand_dims(img3d,0)

#a = load_dicom_images_3d("00000")
#print(a)
#print(a.shape)
#print(np.min(a), np.max(a), np.mean(a), np.median(a))

In [None]:
def generator(source_path, batch_size,y_data):
    path = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/"
    types = ["FLAIR", "T1w" , "T1wCE" , "T2w"]
    
    run = True
    i = 0 
    while run:
        j = 0
        batch_data = np.zeros((batch_size*4, SIZE, SIZE, NUM_IMAGES))
        batch_label = np.zeros(batch_size*4)
        for folder_num in source_path:  
            fullfilename = str(folder_num).zfill(5) 
            for t in types:
                a =load_dicom_images_3d(fullfilename, mri_type=t)  
                batch_data[j,:,:,:] = a  
                j+=1 
            batch_label[i] = y_data.iloc[i]   
            i+=1
            print(i)   
            if (i+1) % batch_size == 0:
                yield batch_data, batch_label
                batch_data = np.zeros((batch_size*4, SIZE, SIZE, NUM_IMAGES))
                batch_label = np.zeros(batch_size*4)
                j = 0 
                
            
                
             


In [None]:
train_generator = generator(X_train, 5, y_train)
val_generator = generator(X_valid, 5, y_valid)

In [None]:
for t, s in train_generator:
    print(t.shape)

In [None]:
num_epochs = 1

In [None]:
# resnet = ResNet50(include_top=False, weights='imagenet', input_shape=(size,size,3))  
# #cnn = Sequential([resnet])
# cnn = Sequential()
# cnn.add(Conv2D(16, 3, input_shape=(size,size, 64)))
# cnn.add(Conv2D(16,(2,2), strides=(1,1)))
# cnn.add(BatchNormalization())

# cnn.add(Conv2D(32,(2,2), strides=(1,1)))
# cnn.add(BatchNormalization()) 

# cnn.add(Conv2D(64,(2,2), strides=(1,1)))
# cnn.add(BatchNormalization()) 

# cnn.add(Flatten())
# cnn.add(Dropout(0.5))

# model= Sequential()
# model.add(TimeDistributed(cnn, input_shape=(20,size,size,64)))
# model.add(GRU(16,input_shape=(None,30,256), return_sequences=True))
# model.add(GRU(8))
# model.add(Dense(2, activation='softmax')) 
# #model = Model(inputs=input_tensor,outputs=out)


In [None]:
from keras.models import Sequential
from keras.layers import Dense, GRU, Dropout, Flatten, BatchNormalization, Activation
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers

model = Sequential()
model.add(Conv3D(64, (3,3,3), strides=(1,1,1), padding='same', input_shape=(20,size,size,64)))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(MaxPooling3D(pool_size=(2,2,1), strides=(2,2,1)))

model.add(Conv3D(128, (3,3,3), strides=(1,1,1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))

# model.add(Dropout(0.25))

model.add(Conv3D(256, (3,3,3), strides=(1,1,1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))

# model.add(Dropout(0.25))

model.add(Conv3D(256, (3,3,3), strides=(1,1,1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation='elu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.7, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

In [None]:
curr_dt_time = datetime.datetime.now()

In [None]:
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, mode='min', epsilon=0.0001, cooldown=0, min_lr=0.00001)
callbacks_list = [checkpoint, LR] 

In [None]:
model.fit_generator(train_generator, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator 
                    , class_weight=None, workers=1, initial_epoch=0)