In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         pass
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Importing required libraries**

In [None]:
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
import cv2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import os
from matplotlib import pyplot as plt
import pydicom
import csv
import gc
from tqdm import tqdm
import tensorflow as tf
import albumentations
import time

### Added a custom sequential model 

In [None]:
class CustomSequentialModel(Model):
    def __init__(self, units=30, activation='relu', **kwargs):
        super().__init__(**kwargs)
        self.layer1 = Conv2D(32, input_shape=(64, 64, 1), activation=activation, kernel_size=(3,3))
        self.layer2 = BatchNormalization()
        self.layer3 = Flatten()
        self.layer4 = Dense(32, activation=activation, kernel_initializer='he_normal')
        self.layer5 = Dropout(0.15)
        self.model_output = Dense(2, activation='sigmoid', kernel_initializer='glorot_uniform')
        
    def call(self, model_input):
        op_layer = self.layer1(tf.dtypes.cast(model_input, tf.float32))
        op_layer = self.layer2(op_layer)
        op_layer = self.layer3(op_layer)
        op_layer = self.layer4(op_layer)
        op_layer = self.layer5(op_layer)
        model_output = self.model_output(op_layer)
        return model_output

### Organising the folder paths and removing unneccessary contents

In [None]:
base_folder = "../input/rsna-miccai-brain-tumor-radiogenomic-classification"
train_data = pd.read_csv(os.path.join(base_folder, "train_labels.csv"))
excluded_patients = ["00109", "00123", "00709"]
image_types = ["T1wCE", "FLAIR", "T1w", "T2w"]
image_types = ["T1w"]

### Constructing a image dataframe with file_path and cancer value

In [None]:
def construct_image_df(test_type):
    image_list = []
    for patient_id in os.listdir(os.path.join(base_folder, test_type)):
        if patient_id not in excluded_patients:
            patient_record_value = train_data[train_data["BraTS21ID"] == int(patient_id)]['MGMT_value'].item()
            for image_type in image_types:
                folder_dir = os.path.join(base_folder, test_type, patient_id, image_type)
                for file_name in os.listdir(folder_dir):
                    image_list.append({"file_path": os.path.join(folder_dir, file_name), "value": str(patient_record_value), "patient_id":patient_id, "image_type": image_type})
    return pd.DataFrame(image_list)

In [None]:
image_df = construct_image_df("train")
train_df, test_df = train_test_split(image_df, test_size=0.2, random_state=7)
test_df.head()

### Processed images by batch since it consumes the total RAM

In [None]:
class CustomDataGen(tf.keras.utils.Sequence):
    def __init__(self, batch_size, df):
        self.batch_size = batch_size
        self.shuffle = True
        self.df = df
        self.n = len(self.df)
    
    def __len__(self):
        l = int(len(self.df) / self.batch_size)
        if l*self.batch_size < len(self.df):
            l += 1
        return l
    
    def __get_resized_image(self, image):
        image_arr = cv2.resize(image.pixel_array, (64, 64))

        return image_arr
    
    def __get_output(self, label, classes):
        return to_categorical(label, num_classes=classes)
    
    def __get_cropped_image(self, image):
        cropped_image = self.crop_pipeline(image=image.pixel_array)["image"]
        return cv2.resize(cropped_image, (64, 64))
        
    
    def __get_data(self, batches):
        X_batch, y_batch = [], []
        for index, row in batches.iterrows():
            image = pydicom.read_file(row['file_path'])
            if (np.amax(image.pixel_array) != 0):
                X_batch.append(self.__get_resized_image(image))
                y_batch.append(self.__get_output(row['value'], 2))
        return np.expand_dims(X_batch, axis=-1), np.array(y_batch)
        
    def __getitem__(self, index):
        batches = self.df[index * self.batch_size: (index+1) * self.batch_size]
        X,y = self.__get_data(batches)
        return X,y

In [None]:
train_datagen = CustomDataGen(batch_size=512, df=train_df)
valid_datagen = CustomDataGen(batch_size=512, df=test_df)

### Building a Deep Learning Network

In [None]:
# model = CustomSequentialModel()
model = Sequential()

model.add(Conv2D(16, input_shape=(64, 64, 1), activation='relu', kernel_size=(4,4)))
model.add(BatchNormalization())
model.add(MaxPooling2D(4,4))

model.add(Conv2D(16, activation='relu', kernel_size=(4,4)))
model.add(MaxPooling2D(2, 2))

model.add(Conv2D(16, activation='relu', kernel_size=(4,4)))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(16, activation='relu', kernel_size=(1, 1)))
model.add(Dense(8, activation='relu'))
model.add(Flatten())
model.add(Dense(16, activation='relu', kernel_initializer='he_normal'))
model.add(Dropout(0.25))

model.add(Dense(2, activation='sigmoid'))

model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=[AUC()])
# model.build(input_shape=(None, 64, 64, 1))
model.summary()

In [None]:
history = model.fit(train_datagen,validation_data=valid_datagen, epochs=10, steps_per_epoch=len(train_df)/ 512)

### Constructing a test dataframe

In [None]:
def construct_test_df(test_type="test"):
    image_list = []
    for patient_id in os.listdir(os.path.join(base_folder, test_type)):
        if patient_id not in excluded_patients:
            for image_type in image_types:
                folder_dir = os.path.join(base_folder, test_type, patient_id, image_type)
                for file_name in os.listdir(folder_dir):
                    image_list.append({"file_path": os.path.join(folder_dir, file_name), "patient_id":patient_id, "image_type": image_type})
    return pd.DataFrame(image_list)

In [None]:
test_df = construct_test_df()
test_df.head()

In [None]:
test_df.groupby(['image_type']).agg(['count'])

### Processing test images by batch

In [None]:
class TestDataGenerator(keras.utils.Sequence):
    def __init__(self, df, batch_size):
        self.df = df
        self.batch_size = batch_size
        
    def __get_input_data(self, batches):
        img_arr = []
        for index, row in batches.iterrows():
            image = pydicom.read_file(row['file_path'])
            img_arr.append(cv2.resize(image.pixel_array, (64, 64)))
        img_arr = np.expand_dims(img_arr, axis=-1)
        return img_arr
    
    def __len__(self):
        l = int(len(self.df) / self.batch_size)
        if l*self.batch_size < len(self.df):
            l += 1
        return l
    
    def __getitem__(self, index):
        batches = self.df[index * self.batch_size: (index+1) * self.batch_size]
        X = self.__get_input_data(batches)
        return X

In [None]:
test_datagen = TestDataGenerator(batch_size=256, df=test_df)
output = model.predict(test_datagen)

In [None]:
modified_output = np.amax(output, axis=1)

In [None]:
test_df['MGMT_value'] = modified_output

In [None]:
test_df

In [None]:
result_df = test_df.groupby('patient_id', as_index=False).agg({"MGMT_value": ['mean']}).reset_index()
result_df.columns = ['id', 'BraTS21ID', 'MGMT_value']
result_df['BraTS21ID'] = result_df['BraTS21ID'].astype('string')

In [None]:
result_df[["BraTS21ID", "MGMT_value"]].head()

In [None]:
result_df[["BraTS21ID", "MGMT_value"]].tail()
mod_result_df = result_df.copy()

In [None]:
mod_result_df['MGMT_value'] = mod_result_df['MGMT_value'].round(1)

### Submitting the result to kaggle for evaluation

In [None]:
mod_result_df[['BraTS21ID', 'MGMT_value']].to_csv('submission.csv', index=False)

In [None]:
mod_result_df.shape

In [None]:
mod_result_df.head()