In [None]:
# importing the necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#from PIL import Image 
import seaborn as sns
import os
import pydicom as dicom
#from pympler import asizeof
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from keras import layers
from keras.layers.experimental import preprocessing
from keras.preprocessing.image import ImageDataGenerator
import cv2
from random import randrange
from skimage.transform import resize
from keras.applications.inception_v3 import InceptionV3
from sklearn.metrics import roc_auc_score

In [None]:
train_labels = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
test = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/test"

In [None]:
# defining a function to read images for training

def load_train_images(path_train):
    array = []          
    label = []
    IMG_PX_SIZE = 150
    cases_to_be_excluded = ['../input/rsna-miccai-png/train/00109', 
                            '../input/rsna-miccai-png/train/00123', 
                            '../input/rsna-miccai-png/train/00709']
    path_cases = sorted([f.path for f in os.scandir(path_train)])
    for i in range(len(path_cases)):
        if (path_cases!=cases_to_be_excluded[0] or  path_cases!=cases_to_be_excluded[1] or path_cases!=cases_to_be_excluded[2]):
            mri_type = sorted([f.path for f in os.scandir(path_cases[i])])
            img_path = sorted([f.path for f in os.scandir(mri_type[0])]) # 0 for flair images
            for k in range(len(img_path)//9):  #10 for 6.4k images
                img = dicom.dcmread(img_path[k])
                if (img.pixel_array.sum()>100000):
                    resized_img = resize(img.pixel_array, (IMG_PX_SIZE, IMG_PX_SIZE))
                    img = np.array(resized_img)
                    stacked_img = np.stack((img,)*3, axis=-1)
                    stacked_img_normalize = stacked_img/np.max(stacked_img)
                    if stacked_img_normalize.sum()>3000:
                        array.append(stacked_img_normalize)
                        label.append(train_labels.MGMT_value[i])
                        #print(img_path[k])
    array = array/np.max(array)
    return array, label

In [None]:
# reading about 7 thousand images.

train_path = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train"
pixels, y = load_train_images(train_path)
print("Number of images loaded are ", len(pixels))
print("Number of labels loaded are ", len(y))

In [None]:
# checking whether our data is imbalanced

plt.figure(figsize=(15,7))
sns.countplot(x = y)

In [None]:
# Visualizing random 10 images.

plt.figure(figsize=(18,12))
for i in range(6):
    plt.subplot(3,2,i+1)
    random_number = randrange(500)
    plt.imshow(pixels[random_number])
    plt.title(y[random_number])
    plt.axis("off")

In [None]:
# data augmentation to prevent overfitting and handling the imbalance in dataset

datagen = ImageDataGenerator(
        #featurewise_center=True,  # set input mean to 0 over the dataset
        #samplewise_center=True,  # set each sample mean to 0
        #featurewise_std_normalization=False,  # divide inputs by std of the dataset
        #samplewise_std_normalization=False,  # divide each input by its std
        #zca_whitening=True,  # apply ZCA whitening
        rotation_range = 10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.30, # Randomly zoom image 
        #width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        #height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        #horizontal_flip = True, #randomly flip images
        #vertical_flip=True  # randomly flip images
        )


datagen.fit(pixels)

# Location plays a major role in detecting the promoter. Therefore minor manipulations are done.

In [None]:
train_x, test_x, train_y, test_y = train_test_split(pixels, y, test_size = 0.20,
                                                    stratify = y)

In [None]:
print("Size of train_x", len(train_x))
print("Size of train_y", len(train_y))
print("Size of test_x", len(test_x))
print("Size of test_y", len(test_y))

In [None]:
# defining a function to create a model

def RSNA_model():
    # Designing our model
    model = keras.Sequential([

        #base,

        layers.Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(150, 150, 3)),
        layers.MaxPooling2D(),

        layers.Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"),
        layers.MaxPooling2D(),

        layers.Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"),
        layers.MaxPooling2D(),

        layers.Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"),
        layers.MaxPooling2D(),

        layers.Conv2D(filters=128,kernel_size=2,padding="same",activation="relu"),
        layers.MaxPooling2D(),

        layers.Conv2D(filters=128,kernel_size=2,padding="same",activation="relu"),
        layers.MaxPooling2D(),

        layers.Conv2D(filters=512,kernel_size=2,padding="same",activation="relu"),
        layers.MaxPooling2D(),

        layers.Flatten(),
    
        layers.Dense(20, activation='relu'),

        layers.Dense(10, activation='relu'),

        #layers.Dense(4, activation='relu'),

        layers.Dense(2, activation='sigmoid')
    ])

    # Compiling our model
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [None]:
model = RSNA_model()

model.summary()

In [None]:
#callback = keras.callbacks.EarlyStopping(monitor = "loss", patience=3, restore_best_weights=True)

# saving the training details in history variable
history = model.fit(datagen.flow(train_x,train_y, batch_size =600), 
                    epochs=10,
                    #callbacks=[callback]
                   )

In [None]:
preds = model.predict(test_x)
prediction = preds[:,1]

roc_auc_score(test_y, prediction)

In [None]:
plt.figure(figsize=(12,8))
sns.displot(prediction)

In [None]:
# A quick check with test dataset 

def load_test_images(path_test):
    array_1 = []
    IMG_PX_SIZE = 150
    path_cases = sorted([f.path for f in os.scandir(path_test)])
    for i in range(len(path_cases)):
        mri_type = sorted([f.path for f in os.scandir(path_cases[i])])
        img_path = sorted([f.path for f in os.scandir(mri_type[0])])
        for k in range(len(img_path)): 
            img = dicom.dcmread(img_path[k])
            if (img.pixel_array.sum()>100000):
                    resized_img = resize(img.pixel_array, (IMG_PX_SIZE, IMG_PX_SIZE))
                    img = np.array(resized_img)
                    stacked_img = np.stack((img,)*3, axis=-1)
                    stacked_img_normalize = stacked_img/np.max(stacked_img)
                    if stacked_img_normalize.sum()>2000:
                        array_1.append(stacked_img_normalize)
                        #array_1.append(img_path[k])
                        break    
                            
    array_1 = array_1/np.max(array_1)
    
    print("Number of t1wce images loaded are ", len(array_1))
    
    
    return array_1

In [None]:
pixels_1 = load_test_images(test)

In [None]:
preds_1 = model.predict(pixels_1)
prediction_1 = preds_1[:,1]

plt.figure(figsize=(12,8))
sns.displot(prediction_1)

In [None]:
# saving the model

#model.save("rsna_miccai_10_b600_flair_7k_0.70auc_imgs.h5")