In [None]:
import os
import json
import glob
import random
import collections

import numpy as np
import pandas as pd
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import random
from tqdm.notebook import tqdm

import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers



TYPES = ["FLAIR", "T1w", "T2w", "T1wCE"]
WHITE_THRESHOLD = 10 # out of 255
EXCLUDE = [109, 123, 709]


train_df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
test_df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
train_df = train_df[~train_df.BraTS21ID.isin(EXCLUDE)]

In [None]:
def load_dicom(path, size = 224):
    ''' 
    Reads a DICOM image, standardizes so that the pixel values are between 0 and 1, then rescales to 0 and 255
    
    Note super sure if this kind of scaling is appropriate, but everyone seems to do it. 
    '''
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return cv2.resize(data, (size, size))

def get_all_image_paths(brats21id, image_type, folder='train'): 
    '''
    Returns an arry of all the images of a particular type for a particular patient ID
    '''
    assert(image_type in TYPES)
    
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/%s/" % folder, 
        str(brats21id).zfill(5),
    )

    paths = sorted(
        glob.glob(os.path.join(patient_path, image_type, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    
    num_images = len(paths)
    
    start = int(num_images * 0.25)
    end = int(num_images * 0.75)

    interval = 3
    
    if num_images < 10: 
        interval = 1
    
    return np.array(paths[start:end:interval])

def get_all_images(brats21id, image_type, folder='train', size=225):
    return [load_dicom(path, size) for path in get_all_image_paths(brats21id, image_type, folder)]


In [None]:
IMAGE_SIZE = 128

def get_all_data_for_train(image_type):
    global train_df
    
    X = []
    y = []
    train_ids = []

    for i in tqdm(train_df.index):
        x = train_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'train', IMAGE_SIZE)
        label = x['MGMT_value']

        X += images
        y += [label] * len(images)
        train_ids += [int(x['BraTS21ID'])] * len(images)
        assert(len(X) == len(y))
    return np.array(X), np.array(y), np.array(train_ids)

def get_all_data_for_test(image_type):
    global test_df
    
    X = []
    test_ids = []

    for i in tqdm(test_df.index):
        x = test_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'test', IMAGE_SIZE)
        X += images
        test_ids += [int(x['BraTS21ID'])] * len(images)

    return np.array(X), np.array(test_ids)

In [None]:
X, y, trainidt = get_all_data_for_train('T1wCE')
X_test, testidt = get_all_data_for_test('T1wCE')
X.shape, y.shape, trainidt.shape

In [None]:
X.shape, y.shape

In [None]:
X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.1, random_state=140)

split = int(X.shape[0] * 0.9)

# X_train = X[:split]
# X_valid = X[split:]

# y_train = y[:split]
# y_valid = y[split:]

# trainidt_train = trainidt[:split]
# trainidt_valid = trainidt[split:]

X_train = tf.expand_dims(X_train, axis=-1)
X_valid = tf.expand_dims(X_valid, axis=-1)

y_train = to_categorical(y_train)
y_valid = to_categorical(y_valid)

X_train.shape, y_train.shape, X_valid.shape, y_valid.shape, trainidt_train.shape, trainidt_valid.shape

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
  tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
])

In [None]:
np.random.seed(0)
random.seed(12)
tf.random.set_seed(12)

inpt = keras.Input(shape=X_train.shape[1:])

pre = keras.layers.experimental.preprocessing.Rescaling(1./255)(inpt)
# h = data_augmentation(h)

# convolutional layer!
h = keras.layers.Conv2D(32, kernel_size=(3, 3),strides=(1,1), activation="relu", name="Conv_1", padding="valid")(pre) 
h = tf.keras.layers.BatchNormalization(axis=-1)(h)
h = keras.layers.Conv2D(32, kernel_size=(3, 3),strides=(1,1), activation="relu", name="Conv_1b", padding="same")(h) 
h = tf.keras.layers.BatchNormalization(axis=-1)(h)
h = keras.layers.MaxPool2D(pool_size=(2,2))(h)
h = keras.layers.Conv2D(64, kernel_size=(3, 3),strides=(1,1), activation="relu", name="Conv_2", padding="valid")(h) 
h = tf.keras.layers.BatchNormalization(axis=-1)(h)
# pooling layer
h = keras.layers.MaxPool2D(pool_size=(2,2))(h) 
h = tf.keras.layers.BatchNormalization(axis=-1)(h)
# convolutional layer!
h = keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", name="Conv_3",padding ="same")(h)
# h = tf.keras.layers.BatchNormalization(axis=-1)(h)
# pooling layer
# h = keras.layers.MaxPool2D(pool_size=(1,1))(h)
h = tf.keras.layers.BatchNormalization(axis=-1)(h)
# h = keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu", name="Conv_4",padding ="valid")(h)
# h = tf.keras.layers.BatchNormalization(axis=-1)(h)
# h = keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu", name="Conv_5",padding ="same")(h)
# h = tf.keras.layers.BatchNormalization(axis=-1)(h)
# h = keras.layers.MaxPool2D(pool_size=(2,2))(h)
# h = tf.keras.layers.BatchNormalization(axis=-1)(h)
h = keras.layers.Dropout(0.4)(h)   

h = keras.layers.Flatten()(h) 

h1 = keras.layers.experimental.preprocessing.Rescaling(1./255)(pre)
h1 =  keras.layers.MaxPool2D(strides=(5,5))(h1)
h1 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation="swish", name="Conv_4b")(h1)
h1 = tf.keras.layers.BatchNormalization(axis=-1)(h1)
h1 =  keras.layers.MaxPool2D(strides=(5,5))(h1)
h1 = keras.layers.Dropout(0.3)(h1)
h1 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation="swish", name="Conv_5b")(h1)
h1 = tf.keras.layers.BatchNormalization(axis=-1)(h1)
h1 =  keras.layers.MaxPool2D(strides=(5,5))(h1)
h1 = keras.layers.Dropout(0.3)(h1)
h1 = keras.layers.Flatten()(h1) 

merge = keras.layers.Concatenate()([h,h1])
# h = global_average_layer(h)
out = keras.layers.Dropout(0.4)(merge)
out = keras.layers.Dense(150, activation='relu')(out)   

# out = keras.layers.Dense(64, activation='relu')(out)   
# out = keras.layers.Dropout(0.2)(out)
# out = keras.layers.Dense(32, activation='relu')(out)   
# out = keras.layers.Dropout(0.2)(out)
output = keras.layers.Dense(2, activation="softmax")(out)

model = keras.Model(inpt, output)

from keras.optimizers import SGD
# opt = SGD(lr=0.1)

checkpoint_filepath = 'best_model.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=False,
monitor='val_auc',
mode='max',
save_best_only=True,
save_freq='epoch')

model.compile(loss='categorical_crossentropy',
             optimizer=tf.keras.optimizers.SGD(learning_rate =0.0001),
             metrics=[tf.keras.metrics.AUC()])

history = model.fit(x=X_train, y = y_train, epochs=95, callbacks=[model_checkpoint_callback], validation_data= (X_valid, y_valid))

In [None]:
model_best = tf.keras.models.load_model(filepath=checkpoint_filepath)

In [None]:
y_pred = model_best.predict(X_valid)

pred = np.argmax(y_pred, axis=1)

result=pd.DataFrame(trainidt_valid)
result[1]=pred

result.columns=['BraTS21ID','MGMT_value']
result2 = result.groupby('BraTS21ID',as_index=False).mean()

result2 = result2.merge(train_df, on='BraTS21ID')
roc_auc_score(result2.MGMT_value_y, result2.MGMT_value_x,)

In [None]:
len(result2)

In [None]:
sample = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')

y_pred = model_best.predict(X_test)

pred = np.argmax(y_pred, axis=1)

result=pd.DataFrame(testidt)
result[1]=pred

result.columns=['BraTS21ID','MGMT_value']
result2 = result.groupby('BraTS21ID',as_index=False).mean()
result2['BraTS21ID'] = sample['BraTS21ID']
result2['MGMT_value'] = result2['MGMT_value'].apply(lambda x:round(x*10)/10)
result2.to_csv('submission.csv',index=False)
result2