In [16]:
#  Import libraries and define variables
import os
import shutil
import glob
from tqdm import tqdm
from PIL import Image
import nibabel as nib
import numpy as np
import pandas as pd
import pickle as pkl
import cv2
import random as rn
from multiprocessing import Pool,Process
import config
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import MaxPooling2D, Dense, Flatten, Dropout, LeakyReLU, Activation, AveragePooling2D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import HeNormal
from keras.layers.convolutional import Conv2D
import tensorflow as tf
from tensorflow.keras.metrics import TruePositives,TrueNegatives,FalsePositives,FalseNegatives,AUC,Recall,Precision
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau,EarlyStopping,TensorBoard
from tensorflow.keras.regularizers import l2
import time
import shutil
import sys
sys.path.append('..')


# Define the modalities and classifications
modalities = ['T1']
classifications = ['MGMT_positive', 'MGMT_negative']

# Define patch size and stride
block_h, block_w = config.PATCH_SIZE
stride = 2

# Interpolated image dimestions
inter_dim = (110, 90)

# Define epoch
epoch = 100
batch_size = 16

# Define paths to the BraTS dataset folders
path = config.MAIN_DIR

PATH = config.MAIN_DIR + 'Data/'
Org_Dir = PATH + 'Original_Data_Backup/'
Work_Dir = PATH + 'Working_Data/'
Preprocess_Dir = path + 'Preprocessed/layers/'

In [2]:
# df = pd.DataFrame(columns=['ID', 'MGMT', 'Mod'])
rows =[]
for type in tqdm(os.listdir(Work_Dir)):
   for mod in modalities:
      for patient in os.listdir(Work_Dir + type + '/' + mod + '/'):
        new_row = { 'ID' : patient, 'MGMT' : (0 if type == 'MGMT_negative' else 1), 'Mod' : mod }
        rows.append(new_row)

df = pd.DataFrame(rows)

  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 39.98it/s]


In [3]:
def data_arr(train, val):
    train_x, train_y = [], []
    val_x, val_y = [], []
    for i in range(len(train)):
        type = 'MGMT_negative' if train['MGMT'].iloc[i] == 0 else 'MGMT_positive'
        pkl_file = pkl.load(open(config.MAIN_DIR+'preprocessed/layers/'+type+'/T1/'+train['ID'].iloc[i],'rb'))
        for arr in pkl_file:
            train_x.append(arr)
            train_y.append(train['MGMT'].iloc[i])
    for i in range(len(val)):
        type = 'MGMT_negative' if val['MGMT'].iloc[i] == 0 else 'MGMT_positive'
        pkl_file = pkl.load(open(config.MAIN_DIR+'preprocessed/layers/'+type+'/T1/'+val['ID'].iloc[i],'rb'))
        for arr in pkl_file:
            val_x.append(arr)
            val_y.append(val['MGMT'].iloc[i])

    # train_x = np.array(train_x)/ 255.0
    # val_x = np.array(val_x)/ 255.0
    print('data_arr done')
    return train_x, train_y, val_x, val_y

In [40]:
def define_model () :
        model = Sequential()

        # model.add(Conv2D(16, (5,5), padding='same',input_shape=(90,110,1),kernel_initializer=HeNormal()))
        # model.add(LeakyReLU(alpha=0.1))
        # model.add(MaxPooling2D(pool_size=(2,2)))
        # model.add(BatchNormalization())
        # model.add(Dropout(0.5))

        model.add(Conv2D(4, (3, 3), padding='same',input_shape=(90,110,1),kernel_initializer=HeNormal(),kernel_regularizer=l2(0.01)))
        model.add(LeakyReLU(alpha=0.1))
        # model.add(MaxPooling2D(pool_size=(2,2)))
        # model.add(BatchNormalization())
        model.add(Dropout(0.1))

        # model.add(Conv2D(8, (3, 3), padding='same',kernel_initializer=HeNormal()))
        # model.add(LeakyReLU(alpha=0.1))
        # model.add(MaxPooling2D(pool_size=(2,2)))
        # model.add(BatchNormalization())
        # model.add(Dropout(0.))

        model.add(Conv2D(2, (3, 3), padding='same',kernel_initializer=HeNormal(),kernel_regularizer=l2(0.01)))
        model.add(LeakyReLU(alpha=0.1))
        # model.add(MaxPooling2D(pool_size=(2,2)))
        # model.add(BatchNormalization())
        # model.add(Dropout(0.1))
        
        # model.add(Conv2D(2, (3, 3), padding='same',kernel_initializer=HeNormal(),kernel_regularizer=l2(0.01)))
        # model.add(LeakyReLU(alpha=0.1))
        # model.add(MaxPooling2D(pool_size=(2,2)))
        # model.add(BatchNormalization())
        # # model.add(Dropout(0.1))


        # model.add(Conv2D(48, (3, 3), padding='same'))
        # model.add(LeakyReLU(alpha=0.1))
        # model.add(MaxPooling2D(pool_size=(2,2)))
        # model.add(Dropout(0.1))

        model.add(Flatten())  # Convert 3D feature map to 1D feature vector.

        # model.add(Dense(10,kernel_initializer=HeNormal()))
        # model.add(LeakyReLU(alpha=0.1))
        # model.add(BatchNormalization())
        # model.add(Dropout(0.3))

        # model.add(Dense(10,kernel_initializer=HeNormal()))
        # model.add(LeakyReLU(alpha=0.1))
        # model.add(BatchNormalization())
        # model.add(Dropout(0.2))
        
        # model.add(Dense(10,kernel_initializer=HeNormal()))
        # model.add(BatchNormalization())
        # model.add(LeakyReLU(alpha=0.1))
        # model.add(Dropout(0.1))

        model.add(Dense(10,kernel_initializer=HeNormal(),kernel_regularizer=l2(0.01)))
        model.add(BatchNormalization())
        model.add(LeakyReLU(alpha=0.1))
        # model.add(Dropout(0.1))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                    optimizer='adam', 
                    metrics=['accuracy',TruePositives(),
                             TrueNegatives(),FalsePositives(),
                             FalseNegatives(),AUC(),Recall(),Precision()])
        return model

In [41]:
#  Funtion Defination --> train the model and stores the history
# import h5py
def model_training(cv, train, val):

    # Model intialisation with GPU
    with tf.device('GPU:0'):
        model = define_model()

        # Compiling the model
        model.compile(loss='binary_crossentropy',
                    optimizer='adam', metrics=['accuracy',TruePositives(),TrueNegatives(),FalsePositives(),FalseNegatives(),AUC(),Recall(),
                                                Precision()])

    # Selecting the data from train_idx and test_idx
    X_train, y_train, X_val, y_val = data_arr(train,val)
    print("all data fetched")
    X_train=np.array(X_train)/255.0
    y_train=np.array(y_train)
    X_val=np.array(X_val)/255.0
    y_val=np.array(y_val)
    # print(y_train[:50])
    print('Train shape: ',X_train.shape)
    print('Val shape: ',X_val.shape)
    # Model Checkpoints
    checkpoint_filepath = config.MAIN_DIR+f'results/model checkpoints/model(k={cv})'
    model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_filepath+'_epoch-{epoch:02d}_acc-{val_accuracy:.4f}.ckpt',
                                                # save_format='h5',
                                                monitor='val_loss', 
                                                mode='min',
                                                save_best_only=True)
    # Reduce LRPlateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=3, min_lr=0.0001)

    # Early stoping
    early_stoping =EarlyStopping(monitor="val_loss",patience=5,mode="min") 

    # Tensorboard
    tensorboard = TensorBoard(f'D:/MGMT research project/tensorboard/logdir_{cv}', histogram_freq=1)
    # Model training
    print(f"Model Training for {cv} was started...")
    
    history = model.fit(X_train, y_train, batch_size=8, epochs=epoch,
                        validation_data=(X_val, y_val,), shuffle=True,callbacks=[reduce_lr,early_stoping, model_checkpoint_callback])
    
    # Stores the history in pickle
    # pkl.dump(history.history,open(config.MAIN_DIR+f'results/history/history_k={cv}.pkl','wb'))
    print(f"Model Training for cv-{cv} was completed....")



In [42]:
skf = StratifiedKFold(n_splits=5, random_state=100, shuffle=True)

i = 0
for train_idx, val_idx in skf.split(df['ID'], df['MGMT']):
    train = df.iloc[train_idx]
    val = df.iloc[val_idx]
    i = i+1
    # train.to_csv('splits/train_' + str(i) + '.csv', index=False)
    # val.to_csv('splits/val_' + str(i) + '.csv', index=False)

    model_training(i, train, val)
    if i == 1:
        break



data_arr done
all data fetched
Train shape:  (6808, 90, 110)
Val shape:  (1641, 90, 110)
Model Training for 1 was started...
Epoch 1/100



INFO:tensorflow:Assets written to: D:/MGMT research project/results/model checkpoints\model(k=1)_epoch-01_acc-0.4851.ckpt\assets


INFO:tensorflow:Assets written to: D:/MGMT research project/results/model checkpoints\model(k=1)_epoch-01_acc-0.4851.ckpt\assets


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Model Training for cv-1 was completed....


In [20]:
# Checking the best model for other 4 splits

from sklearn.metrics import accuracy_score

best_model_k = 5

model = define_model()
model.load_weights("D:\MGMT research project\Codes\model(k=5)_epoch-01_acc-0.5274.ckpt")
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

for i in range(5):
    if(i+1 != best_model_k) : 
        train = pd.read_csv('splits/train_' + str(i+1) + '.csv')
        val = pd.read_csv('splits/val_' + str(i+1) + '.csv')
        
        X_train, y_train, X_val, y_val = data_arr(train,val)

        X_val=np.array(X_val)/255.0
        y_val=np.array(y_val)

        output = model.predict(X_val)
        output = np.where(output.squeeze()>0.5,1,0)
        print("Accuracy on " + str(i+1)+ " : ", accuracy_score(y_val, output))
    



data_arr done
Accuracy on 1 :  0.52224253503961
data_arr done
Accuracy on 2 :  0.5223188405797101
data_arr done
Accuracy on 3 :  0.6509711595055915
data_arr done
Accuracy on 4 :  0.5833333333333334
