In [30]:
# https://www.analyticsvidhya.com/blog/2021/05/tuning-the-hyperparameters-and-layers-of-neural-network-deep-learning/
# Bayesian Optimization(BO)
# : Grid Search 처럼 모든 경우를 다 계산하는 것이 아니라, 몇개만 계산해서 objective function 의 최대 or 최소가 될 수 있는 hyperparameter 를 찾는 최적화기법

In [31]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
from math import floor
from sklearn.metrics import make_scorer, accuracy_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
from keras.layers import LeakyReLU
LeakyReLU = LeakyReLU(alpha=0.1)
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
from sklearn.preprocessing import StandardScaler

from tensorflow import keras

from numpy.random import seed # 예측 결과 일정하게 하기 위함
seed(1) # 예측 결과 일정하게 하기 위함
import tensorflow as tf  # 예측 결과 일정하게 하기 위함
tf.random.set_seed(2) # 예측 결과 일정하게 하기 위함


In [32]:
# This code makes accuracy the scorer metric.

score_acc = make_scorer(accuracy_score)

In [33]:
# Loaddata set

data = pd.read_csv("bladder_cancer.csv")
data.head(3)

data = data.dropna()

X = data.drop(columns=['Label'], axis=1)
y = data['Label']

In [34]:
# train_set with cross-validation : test_set = 80 : 20 

train_feature, test_feature, train_label, test_label = train_test_split(X, y, test_size = 0.2,random_state=0)

In [35]:
# feature normalization, label 은 normalization 진행하지 않았음
scaler = StandardScaler() # scaler 객체 생성
scaler.fit(train_feature) # train_feature 의 mean 과 standard deviation 값을 추출
train_feature_scaled = scaler.transform(train_feature) # train_feature 의 정규화 진행
test_feature_scaled = scaler.transform(test_feature) # test_feature 의 정규화 진행.
# test_feature 는 mean 과 standard deviation 값을 추출하는 과정 하면 안됨. 
# 학습할 때와 동일한 기반 설정으로 동일하게 테스트 데이터를 변환되야 함. 

In [36]:
# pandas numpy 로 변환
train_label = np.array(train_label)
test_label = np.array(test_label)

In [37]:
haha = ['A','B','C']
haha[1]

'B'

In [38]:
# Create function
def nn_cl_bo2(neurons_1st_hidden, activation,optimizer,neurons_other_hidden_1,neurons_other_hidden_2, learning_rate, batch_size, epochs,
              layers1, layers2):
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','SGD']
    optimizerD= {'Adam':Adam(lr=learning_rate), 'SGD':SGD(lr=learning_rate),
                 'RMSprop':RMSprop(lr=learning_rate), 'Adadelta':Adadelta(lr=learning_rate),
                 'Adagrad':Adagrad(lr=learning_rate), 'Adamax':Adamax(lr=learning_rate),
                 'Nadam':Nadam(lr=learning_rate), 'Ftrl':Ftrl(lr=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU,'relu']
    neurons_1st_hidden = round(neurons_1st_hidden)
    neurons_other_hidden_1 = round(neurons_other_hidden_1)
    neurons_other_hidden_2 = round(neurons_other_hidden_2)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    def nn_cl_fun():
        nn = Sequential()
        nn.add(Dense(neurons_1st_hidden, input_dim=10, activation=activation))
        for i in range(layers1):
            nn.add(Dense(neurons_other_hidden_1, activation=activation))
        for i in range(layers2):
            nn.add(Dense(neurons_other_hidden_2, activation=activation))
        nn.add(Dense(1, activation='sigmoid'))
        nn.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        return nn
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=0, patience=10)
    nn = KerasClassifier(build_fn=nn_cl_fun, epochs=epochs, batch_size=batch_size, verbose=0)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    score = cross_val_score(nn, train_feature_scaled, train_label, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [44]:
# The following code searches for the optimum hyperparameters and layers for the Neural Network model
params_nn2 ={
    'neurons_1st_hidden': (10, 500),
    'neurons_other_hidden_1':(10,500),
    'neurons_other_hidden_2':(10,500),
    'activation':(0, 9),
    'optimizer':(0,7),
    'learning_rate':(0.00001, 0.001),
    'batch_size':(10, 200),
    'epochs':(50, 500),
    'layers1':(1,3),
    'layers2':(1,5),
}
# Run Bayesian Optimization
# 'normalization':(0,1), # 이건 사용 안함

nn_bo = BayesianOptimization(nn_cl_bo2, params_nn2, random_state=1)
nn_bo.maximize(init_points=25, n_iter=10)

|   iter    |  target   | activa... | batch_... |  epochs   |  layers1  |  layers2  | learni... | neuron... | neuron... | neuron... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6388  [0m | [0m 3.753   [0m | [0m 146.9   [0m | [0m 50.05   [0m | [0m 1.605   [0m | [0m 1.587   [0m | [0m 0.000101[0m | [0m 101.3   [0m | [0m 179.3   [0m | [0m 204.4   [0m | [0m 3.772   [0m |
| [95m 2       [0m | [95m 0.7767  [0m | [95m 3.773   [0m | [95m 140.2   [0m | [95m 142.0   [0m | [95m 2.756   [0m | [95m 1.11    [0m | [95m 0.000673[0m | [95m 214.5   [0m | [95m 283.8   [0m | [95m 78.79   [0m | [95m 1.387   [0m |
| [0m 3       [0m | [0m 0.5532  [0m | [0m 7.207   [0m | [0m 194.0   [0m | [0m 191.0   [0m | [0m 2.385   [0m | [0m 4.506   [0m | [0m 0.000895[0m | [0m 51.67   [0m | [0m 29.14   [0m | [0m 93.22

In [46]:
# Fitting Neural Network
params_nn_ = nn_bo.max['params']
learning_rate = params_nn_['learning_rate']
activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
               'elu', 'exponential', LeakyReLU,'relu']
params_nn_['activation'] = activationL[round(params_nn_['activation'])]
params_nn_['batch_size'] = round(params_nn_['batch_size'])
params_nn_['epochs'] = round(params_nn_['epochs'])
params_nn_['layers1'] = round(params_nn_['layers1'])
params_nn_['layers2'] = round(params_nn_['layers2'])
optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','Adam']
optimizerD= {'Adam':Adam(lr=learning_rate), 'SGD':SGD(lr=learning_rate),
             'RMSprop':RMSprop(lr=learning_rate), 'Adadelta':Adadelta(lr=learning_rate),
             'Adagrad':Adagrad(lr=learning_rate), 'Adamax':Adamax(lr=learning_rate),
             'Nadam':Nadam(lr=learning_rate), 'Ftrl':Ftrl(lr=learning_rate)}
params_nn_['optimizer'] = optimizerD[optimizerL[round(params_nn_['optimizer'])]]
params_nn_['neurons_1st_hidden'] = round(params_nn_['neurons_1st_hidden'])
params_nn_['neurons_other_hidden_1'] = round(params_nn_['neurons_other_hidden_1'])
params_nn_['neurons_other_hidden_2'] = round(params_nn_['neurons_other_hidden_2'])
params_nn_

{'activation': 'relu',
 'batch_size': 116,
 'epochs': 462,
 'layers1': 2,
 'layers2': 3,
 'learning_rate': 0.0004911307604259406,
 'neurons_1st_hidden': 306,
 'neurons_other_hidden_1': 279,
 'neurons_other_hidden_2': 464,
 'optimizer': <keras.optimizers.optimizer_v2.nadam.Nadam at 0x1c57edbdf40>}

In [52]:
def nn_cl_fun_2():
    nn = Sequential()
    nn.add(Dense(params_nn_['neurons_1st_hidden'], input_dim=10, activation=params_nn_['activation']))
    for i in range(params_nn_['layers1']):
        nn.add(Dense(params_nn_['neurons_other_hidden_1'], params_nn_['activation']))
    # if params_nn_['dropout'] > 0.5:
    #     nn.add(Dropout(params_nn_['dropout_rate'], seed=123))
    for i in range(params_nn_['layers2']):
        nn.add(Dense(params_nn_['neurons_other_hidden_2'], params_nn_['activation']))
    nn.add(Dense(1, activation='sigmoid'))
    
    optimizer = keras.optimizers.Nadam(learning_rate=learning_rate)
    
    nn.compile(loss='binary_crossentropy',  optimizer=params_nn_['optimizer'], metrics=['accuracy'])
    return nn

es = EarlyStopping(monitor='accuracy', mode='max', verbose=0, patience=10)
nn = KerasClassifier(build_fn=nn_cl_fun_2, epochs=params_nn_['epochs'], batch_size=params_nn_['batch_size'],
                         verbose=0)


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for train_index, validation_index in skf.split(train_feature_scaled,train_label):
        nn.fit(train_feature_scaled,train_label, validation_data=(train_feature_scaled[validation_index],train_label[validation_index]), verbose=1)

Epoch 1/462
Epoch 2/462
Epoch 3/462
Epoch 4/462
Epoch 5/462
Epoch 6/462
Epoch 7/462
Epoch 8/462
Epoch 9/462
Epoch 10/462
Epoch 11/462
Epoch 12/462
Epoch 13/462
Epoch 14/462
Epoch 15/462
Epoch 16/462
Epoch 17/462
Epoch 18/462
Epoch 19/462
Epoch 20/462
Epoch 21/462
Epoch 22/462
Epoch 23/462
Epoch 24/462
Epoch 25/462
Epoch 26/462
Epoch 27/462
Epoch 28/462
Epoch 29/462
Epoch 30/462
Epoch 31/462
Epoch 32/462
Epoch 33/462
Epoch 34/462
Epoch 35/462
Epoch 36/462
Epoch 37/462
Epoch 38/462
Epoch 39/462
Epoch 40/462
Epoch 41/462
Epoch 42/462
Epoch 43/462
Epoch 44/462
Epoch 45/462
Epoch 46/462
Epoch 47/462
Epoch 48/462
Epoch 49/462
Epoch 50/462
Epoch 51/462
Epoch 52/462
Epoch 53/462
Epoch 54/462
Epoch 55/462
Epoch 56/462
Epoch 57/462
Epoch 58/462
Epoch 59/462
Epoch 60/462
Epoch 61/462
Epoch 62/462
Epoch 63/462
Epoch 64/462
Epoch 65/462
Epoch 66/462
Epoch 67/462
Epoch 68/462
Epoch 69/462
Epoch 70/462
Epoch 71/462
Epoch 72/462
Epoch 73/462
Epoch 74/462
Epoch 75/462
Epoch 76/462
Epoch 77/462
Epoch 78

In [53]:
nn.score(test_feature_scaled, test_label)

0.8125

In [54]:
nn.score(train_feature_scaled,train_label), nn.score(train_feature_scaled[validation_index],train_label[validation_index]), nn.score(test_feature_scaled, test_label)

(1.0, 1.0, 0.8125)