# CNN model with Bayesian Searched hyperparameter

We will be using this library for hyperparameter search with bayesian optimization method

pip install bayesian-optimization

        or

conda install -c conda-forge bayesian-optimization


In [None]:
%load_ext tensorboard
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv3D,MaxPool3D,Flatten,Dropout,BatchNormalization,LeakyReLU
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import TensorBoard
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.models import load_model, save_model
from tensorflow.keras.optimizers import Adam
!rm -rf ./logs/

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
from os import path
import gc
import matplotlib
import nvgpu
import multiprocessing
import neptune
from functools import partial
from bayes_opt import BayesianOptimization
import csv

In [None]:
nvgpu.gpu_info()

In [None]:
K.clear_session()
gc.collect()
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
train_alive = "/home/airadiomicslab/OneDrive/RTOGData/ready64_18M_HD/alive"
train_dead  = "/home/airadiomicslab/OneDrive/RTOGData/ready64_18M_HD/dead/"

In [None]:
def load_patient_images(input_path):
    np.load(input_path)
    return np.array(np.load(input_path))

In [None]:
alivelist = os.listdir(train_alive)
deadlist = os.listdir(train_dead)

In [None]:
path_list_alive = []
path_list_dead = []
for i in alivelist:
    path_list_alive.append(os.path.join(train_alive,i))
for i in deadlist:
    path_list_dead.append(os.path.join(train_dead,i))
    
labels_alive = np.zeros(len(alivelist))
labels_alive = list(labels_alive)
labels_dead = np.ones(len(deadlist))
labels_dead = list(labels_dead)

In [None]:
labels=[]
images = []

slices_alive = [load_patient_images(path_list_alive[s]) for s in range(0,len(path_list_alive))]
slices_dead = [load_patient_images(path_list_dead[k]) for k in range(0,len(path_list_dead))]

images.append(slices_alive.copy())
images[0].extend(slices_dead.copy())
images = np.array(images)
images = images[0,:,:,:,:]
slices_alive.clear()
slices_dead.clear()

labels.append(labels_alive.copy())
labels[0].extend(labels_dead.copy())
labels = np.array(labels)
labels = labels[0,:]
labels_alive.clear()
labels_dead.clear()

print(f"Labels Shape for input into CNN: {labels.shape}")
print(f"Data Shape for input into CNN: {images.shape}")
gc.collect()

In [None]:
images = images.astype(dtype='float32')
images.dtype

In [None]:
gc.collect()

In [None]:
labels = labels.astype(dtype='uint8')
labels.dtype

In [None]:
gc.collect()

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(images, labels, test_size = 0.2)
gc.collect()

In [None]:
del images

In [None]:
del labels

In [None]:
print(f"Shape for Training Set is: {xtrain.shape} and corrsponding labels shape is: {ytrain.shape} ")
print(f"Shape for Test Set is:     {xtest.shape} and corrsponding labels shape is:  {ytest.shape} ")

In [None]:
xtrain = xtrain.reshape(len(xtrain),64,64,64,1)
xtest = xtest.reshape(len(xtest),64,64,64,1)

print(f"New Shape for Training data required for MODEL is: {xtrain.shape} ")
print(f"New Shape for Test data is: required for MODEL is: {xtest.shape} ")

In [None]:
#xtrain = xtrain.astype(dtype='float32')
#xtest = xtest.astype(dtype='float32')

In [None]:
#ytrain = ytrain.astype(dtype='uint8')
#ytest = ytest.astype(dtype='uint8')

In [None]:
plt.imshow(xtrain[15][:,:,32,0],cmap='gray')

In [None]:
K.clear_session()
gc.collect()

In [None]:
filepathAcc = "/home/airadiomicslab/OneDrive/BayesianModels/RTOGBayesianBestAccModel.hdf5"
filepathLoss = "/home/airadiomicslab/OneDrive/BayesianModels/RTOGBayesianBestLossModel.hdf5"
checkpointAcc = [ModelCheckpoint(filepathAcc,monitor='val_accuracy',verbose=1,save_best_only=True,mode='max')]
checkpointLoss = [ModelCheckpoint(filepathLoss,monitor='val_loss',verbose=1,save_best_only=True,mode='min')]
early_stop = [EarlyStopping(monitor='val_loss',patience=10)]
callback_list = [early_stop,checkpointAcc,checkpointLoss]

In [None]:
image_shape=(64,64,64,1)

In [None]:
path2SaveModel = "/home/airadiomicslab/OneDrive/RTOGModels/"
path2SaveError = "/home/airadiomicslab/OneDrive/RTOGRunErrors/"

In [None]:
def QARCEsoModel(batch_size,learning_rate,L2_2,L2_3,BNConv1,BNConv2,BNDense1,
                 BNDense2,BNDense3):
    
    model = Sequential() 
    model.add(Conv3D(filters=16,kernel_size=(5,7,5),strides=(1,1,1),padding='same',
                     input_shape=image_shape, activation='relu',name='Conv3D_InputLayer')) 
    model.add(MaxPool3D(pool_size=(2,3,2),name='MaxPool4InputLayer'))

    if L2_2:
        L2_2 = 0.01
    else:
        L2_2 = 0.001
        
    model.add(Conv3D(filters=32,kernel_size=(3,3,3),strides=(1,1,1),
                     padding='same',kernel_regularizer=tf.keras.regularizers.l2(L2_2),activation='relu',
                     name='Conv3D_FirstLayer'))
    model.add(MaxPool3D(pool_size=(2,3,2),name='MaxPool4FirstLayer'))
    if BNConv1:
        model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    if L2_3:
        L2_3= 0.01
    else:
        L2_3 = 0.001

    model.add(Conv3D(filters=128,kernel_size=(3,3,3),strides=(1,1,1),
                     padding='same',kernel_regularizer=tf.keras.regularizers.l2(L2_3),activation='relu',
                     name='Conv3D_SecondLayer'))
    model.add(MaxPool3D(pool_size=(3,3,3),name='MaxPool4SecondLayer'))
    if BNConv2:
        model.add(BatchNormalization())
    model.add(Dropout(0.5))


    model.add(Flatten())

    model.add(Dense(512,activation='relu'))
    if BNDense1:
        model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(256,activation='relu'))
    if BNDense2:
        model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(128,activation='relu'))
    if BNDense3:
        model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(1,activation='sigmoid'))
    
    if learning_rate == 0:
        learning_rate = 0.01
    elif learning_rate == 1:
        learning_rate = 0.01
    elif learning_rate == 2:
        learning_rate = 0.001
    elif learning_rate == 3:
        learning_rate = 0.0001
    else:
        learning_rate = 0.00001
        
    optimizer = Adam(learning_rate = learning_rate, name='Adam')

    model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])
    # INSERT CHECKPOINT FOR SAVING MODEL
    model.fit(xtrain,ytrain,batch_size=batch_size,epochs=500,validation_data=(xtest,ytest),
              steps_per_epoch=xtrain.shape[0]//batch_size,
              callbacks=early_stop,verbose=1,shuffle=True)
    _, accuracy = model.evaluate(xtest,ytest,batch_size=8)
    return accuracy,model

In [None]:
def generate_cnn(batch_size,learning_rate,L2_2,L2_3,BNConv1,BNConv2,BNDense1,BNDense2,BNDense3):
    global session_num
    global last_acc
    # define parameters
    params = {
        'batch_size': int(np.around(batch_size)),
        'learning_rate' :  int(np.around(learning_rate)),
        'L2_2' : int(np.around(L2_2)),
        'L2_3' : int(np.around(L2_3)),
        'BNConv1' : int(np.around(BNConv1)),
        'BNConv2' : int(np.around(BNConv2)),
        'BNDense1' : int(np.around(BNDense1)),
        'BNDense2' : int(np.around(BNDense2)),
        'BNDense3' : int(np.around(BNDense3))
    }
    run_name = "run-%d" % session_num
    session_num +=1
    print('--- Starting trial: %s' % run_name)
    #print({params.name: params[h] for h in params})
    for value in params:
        print(f'Parameter values for this run are {value}: {params[value]}')
    try:
        # select project
        #neptune.set_project('kundan25/EsoCRPredictionBaysian') # NOT SURE TO USE IT OR NOT
        #neptune.init('kundan25/EsoCRPredictionBaysian')
        with neptune.create_experiment(name='RTOG0617ModelRun3',params=params) as BayesianOptimizer:
            valid_acc,model = QARCEsoModel(**params)
            last_acc = valid_acc
            if (valid_acc > 0.95):
                save_model(model,filepath=path2SaveModel+'bestmodel95' + run_name +'.hdf5')
                BayesianOptimizer.append_tag('BestAcc95')
            elif (valid_acc > 0.90):
                save_model(model,filepath=path2SaveModel+'bestmodel90' + run_name +'.hdf5')
                BayesianOptimizer.append_tag('BestAcc90')
            elif (valid_acc > 0.85):
                save_model(model,filepath=path2SaveModel+'bestmodel85' + run_name +'.hdf5')
                BayesianOptimizer.append_tag('BestAcc85')
            elif (valid_acc > 0.80):
                save_model(model,filepath=path2SaveModel+'bestmodel80' + run_name +'.hdf5')
                BayesianOptimizer.append_tag('BestAcc80')
            for i in model.history.history['accuracy']:
                BayesianOptimizer.log_metric('Training Accuracy', i)
                BayesianOptimizer.log_metric('Max_Training_Accuracy', max(model.history.history['accuracy']))
            for i in model.history.history['val_accuracy']:
                BayesianOptimizer.log_metric('Validation_Accuracy', i)
                BayesianOptimizer.log_metric('Max Val_Accuracy', max(model.history.history['val_accuracy']))
            for i in model.history.history['loss']:
                BayesianOptimizer.log_metric('Training Loss', i)
                BayesianOptimizer.log_metric('Min Training_Loss', min(model.history.history['loss']))
            for i in model.history.history['val_loss']:
                BayesianOptimizer.log_metric('Validation Loss', i)
                BayesianOptimizer.log_metric('Min Val_Loss', min(model.history.history['val_loss']))
            tf.keras.backend.clear_session()
    except tf.errors.ResourceExhaustedError as e:
        print('RESOURCE GOT EXHAUSTED')
        with open(path2SaveError + "ErrParams"+ run_name + ".csv","a") as csv_file:
            csv_writer = csv.writer(csv_file)
            for key,value in params.items():
                csv_writer.writerow([key,value])
        csv_file.close()
        session_num += 1
        valid_acc = last_acc
        tf.keras.backend.clear_session()    
    return valid_acc

In [None]:
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events

In [None]:
pbounds = {'batch_size': (4,64),'learning_rate': (0, 4),'L2_2': (0, 1),'L2_3': (0, 1),
           'BNConv1': (0, 1),'BNConv2': (0, 1),'BNDense1': (0, 1),'BNDense2': (0, 1),'BNDense3': (0, 1)
         }
CNN_BAYESIAN = BayesianOptimization(generate_cnn,pbounds=pbounds,verbose=2)

In [None]:
logger = JSONLogger(path="/home/airadiomicslab/OneDrive/logs/RTOGBayesianlogs.json")
CNN_BAYESIAN.subscribe(Events.OPTIMIZATION_STEP, logger)

In [None]:
api_token = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiM2M5MDk2YWItYzRiMS00MThlLTljZTctMDFmNDJkYWIwZmU5In0="

In [None]:
session_num = 1
neptune.init('kundan25/RTOG0617',api_token=api_token)
neptune.set_project('kundan25/RTOG0617')
init_points = 25
n_iter = 2000
with neptune.create_experiment(name='BayesianRun3',params=pbounds) as BayesianRunOptimizer:
    CNN_BAYESIAN.maximize(init_points = init_points, n_iter = n_iter, acq = 'ei', xi = 0.0)
    BayesianRunOptimizer.append_tag('BayesianOptimizerRun')

In [None]:
session_num

In [None]:
{'batch_size': (8,18),'epochs':(25,1000),'filter2': (28,36),'filter3': (122,134),'f1_k1': (3, 7),
           'f1_k2': (3, 7),'f1_k3': (3, 7),'learning_rate': (0.0001, 0.01),'dropout_rate1': (0.2, 0.4),
           'L2_2': (0.0001, 1),'L2_3': (0.0001, 1),'D_droput2': (0.2, 0.4),'D_droput3': (0.2, 0.4),
           'BNConv1': (0, 1),'BNConv2': (0, 1),'BNDense1': (0, 1),'BNDense2': (0, 1),'BNDense3': (0, 1)
         }

In [None]:
for i,value in enumerate(CNN_BAYESIAN.max['params']):
        BayesianRunOptimizer.log_metric(value,CNN_BAYESIAN.max['Best_params'][value])

In [None]:
print('Best Model accuracy is: %f' % CNN_BAYESIAN.max['target'])
print('Best Model parameters are: %s' % CNN_BAYESIAN.max['params'])

In [None]:
from bayes_opt.util import load_logs

In [None]:
newpbounds = {'batch_size': (8,8),'epochs':(694,694),'filter2': (28,28),'filter3': (122,122),'f1_k1': (7, 7),
           'f1_k2': (3, 3),'f1_k3': (3, 3),'learning_rate': (0.0001, 0.0001),'dropout_rate1': (0.2, 0.2),
           'L2_2': (0.0001, 0.0001),'L2_3': (0.0001, 0.0001),'D_droput2': (0.2, 0.2),'D_droput3': (0.4, 0.4),
           'BNConv1': (0, 0),'BNConv2': (0, 0),'BNDense1': (0, 0),'BNDense2': (1, 1),'BNDense3': (0, 0)
         }

In [None]:
New_CNN_BAYESIAN = BayesianOptimization(generate_cnn,pbounds=pbounds,verbose=2)

In [None]:
session_num = 106
neptune.init('kundan25/EsoCRPredictionBaysian',api_token=api_token)
neptune.set_project('kundan25/EsoCRPredictionBaysian')
init_points = 0
n_iter = 400
with neptune.create_experiment(name='BayesianRun',params=pbounds) as BayesianRunOptimizer:
    CNN_BAYESIAN.maximize(init_points = init_points, n_iter = n_iter, acq = 'ei', xi = 0.0)
    BayesianRunOptimizer.append_tag('BayesianOptimizerRun')

In [None]:
print(len(CNN_BAYESIAN.space))

In [None]:
from bayes_opt.util import load_logs

In [None]:
load_logs(CNN_BAYESIAN, logs=["/home/airadiomicslab/OneDrive/logs/Bayesianlogs2.json"]);

In [None]:
print("New optimizer is now aware of {} points.".format(len(CNN_BAYESIAN.space)))

In [None]:
NEW_CNN_BAYESIAN.maximize(init_points = 0, n_iter = 100, acq = 'ei', xi = 0.0)

In [None]:
print(CNN_BAYESIAN.max)