In [None]:
import os
import pandas as pd
import seaborn as sns
import json
import ast
import glob
from tqdm import tqdm
import gc

import  matplotlib.pyplot as plt
import matplotlib.image as mpimg

import pydicom
import cv2
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
# Seed for reproducability
import numpy as np
import random
seed = 1234
np.random.seed(seed)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

In [None]:
data_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification"
mri_types = ["FLAIR","T1w","T2w","T1wCE"]

In [None]:
train_df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')
train_df.head()

In [None]:
test_df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')

print(f"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}")

In [None]:
def load_dicom(path, size = 224):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return cv2.resize(data, (size, size))

In [None]:
def get_all_image_paths(brats21id, image_type, folder='train'): 
    assert(image_type in mri_types)
    
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/%s/" % folder, 
        str(brats21id).zfill(5),
    )

    paths = sorted(
        glob.glob(os.path.join(patient_path, image_type, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    
    num_images = len(paths)
    
    start = int(num_images * 0.25)
    end = int(num_images * 0.75)

    interval = 3
    
    if num_images < 10: 
        interval = 1
    
    return np.array(paths[start:end:interval])

def get_all_images(brats21id, image_type, folder='train', size=225):
    return [load_dicom(path, size) for path in get_all_image_paths(brats21id, image_type, folder)]

In [None]:
def get_all_data_for_train(image_type, image_size=32):
    global train_df
    
    X = []
    y = []
    train_ids = []

    for i in tqdm(train_df.index):
        x = train_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'train', image_size)
        label = x['MGMT_value']

        X += images
        y += [label] * len(images)
        train_ids += [int(x['BraTS21ID'])] * len(images)
        assert(len(X) == len(y))
    return np.array(X), np.array(y), np.array(train_ids)

In [None]:
def get_all_data_for_test(image_type, image_size=32):
    global test_df
    
    X = []
    test_ids = []

    for i in tqdm(test_df.index):
        x = test_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'test', image_size)
        X += images
        test_ids += [int(x['BraTS21ID'])] * len(images)

    return np.array(X), np.array(test_ids)

In [None]:
X, y, trainidt = get_all_data_for_train('FLAIR', image_size=224)
X_test, testidt = get_all_data_for_test('FLAIR', image_size=224)

In [None]:
X.shape, y.shape, trainidt.shape

In [None]:
X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.2, random_state=42)

In [None]:
X_train = tf.expand_dims(X_train, axis=-1)
X_valid = tf.expand_dims(X_valid, axis=-1)

In [None]:
y_train = to_categorical(y_train)
y_valid = to_categorical(y_valid)

In [None]:
def get_model03():
    np.random.seed(0)
    random.seed(12)
    tf.random.set_seed(12)

    inpt = keras.Input(shape=X_train.shape[1:])

    h = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(inpt)

    h = keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu", name="Conv_1")(h)
    h = keras.layers.MaxPool2D(pool_size=(2, 2))(h)

    h = keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", name="Conv_3",)(h)
    h = keras.layers.MaxPool2D(pool_size=(2, 2))(h)
    
    h = keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu", name="Conv_5")(h)
    h = keras.layers.MaxPool2D(pool_size=(2, 2))(h)
    

    
    h = keras.layers.Flatten()(h)
    h = keras.layers.Dropout(0.1)(h)
    h = keras.layers.Dense(512, activation="relu",kernel_initializer = tf.keras.initializers.HeNormal())(h)
    h = keras.layers.Dropout(0.1)(h)
    
    output = keras.layers.Dense(2, activation="softmax",kernel_initializer = tf.keras.initializers.HeNormal())(h)
    model = keras.Model(inpt, output)

    # https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules/ExponentialDecay
    
    initial_learning_rate =  0.0001
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=100000,
        decay_rate=0.96, 
        staircase=True
    )
  
    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')
    accuracy  = tf.keras.metrics.Accuracy(name = "accuracy")
    model.compile(
        loss="categorical_crossentropy", 
        optimizer=keras.optimizers.Adam(learning_rate=0.0001),
        metrics=[roc_auc,accuracy],
    )
    return model

In [None]:
checkpoint_filepath = "best_model.h5"

model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor="val_roc_auc",
    mode="max",
    save_best_only=True,
    save_freq="epoch",
    verbose=1,
)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", mode='max', patience=3)

In [None]:
# roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')
# accuracy  = tf.keras.metrics.Accuracy(name = "accuracy")
# model.compile(optimizer="adam", loss="binary_crossentropy", metrics=[accuracy,roc_auc])

In [None]:
# model = get_model02() # LB score 0.676
model = get_model03() # LB score 0.5
model.summary()

In [None]:
history = model.fit(x=X_train, y = y_train, epochs=100, 
                    callbacks=[model_checkpoint_cb],
                    validation_data=(X_valid, y_valid))

In [None]:
model_best = tf.keras.models.load_model(filepath=checkpoint_filepath)

In [None]:
y_pred = model_best.predict(X_valid)

pred = np.argmax(y_pred, axis=1)

result = pd.DataFrame(trainidt_valid)
result[1] = pred

result.columns = ["BraTS21ID", "MGMT_value"]
result2 = result.groupby("BraTS21ID", as_index=False).mean()

result2 = result2.merge(train_df, on="BraTS21ID")
auc = roc_auc_score(
    result2.MGMT_value_y,
    result2.MGMT_value_x,
)
print(f"Validation AUC={auc}")

In [None]:
y_pred = model_best.predict(X_test)

pred = np.argmax(y_pred, axis=1) #

result = pd.DataFrame(testidt)
result[1] = pred
pred

In [None]:
sample_submission = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')

In [None]:
result.columns=['BraTS21ID','MGMT_value']

result2 = result.groupby('BraTS21ID',as_index=False).mean()
result2['BraTS21ID'] = sample_submission['BraTS21ID']

# Rounding... 0.907866 -> 0.9
result2['MGMT_value'] = result2['MGMT_value'].apply(lambda x:round(x*10)/10)
# result2['MGMT_value'] = result2['MGMT_value'] # No rounding
result2.to_csv('submission.csv',index=False)
result2