<a href="https://colab.research.google.com/github/mankicom/DEV_GDPS_TEMP_LSTM/blob/master/Bayesian_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Bayesian optimization을 이용한 TCN 초모수 최적화**
단기 풍속 편차보정모델 개발 예제



### **모듈로드**


*   기본모듈: numpy, pandas, os, sys, time, joblib,f90nml
> f90nml: 포트란 네임리스트 읽기 모듈
*   Scikit-learn 및 Scikit-optimize
> 입력자료 정규화, 교차검증, 초모수 최적화 관련 모듈
*   Tensorflow, Keras, TCN 관련
> 모델 개발 관련, Keras는 최근에 Tensorflow에 많은 기능이 포함되어 나옴
*   Local 함수
> 사용자 함수, 훈련자료 로드, 개량한 교차검증 함수

특별히 설치해줘야할 모듈들
> pip install f90nml

> pip install scikit-optimize

> pip install keras-tcn












In [19]:
#-------------------------------------------------------------------------
# .. Module load

#.. module
import numpy as np
import pandas as pd
import os
import sys
from time import time
#import joblib
#import argparse
import f90nml

from sklearn.metrics import make_scorer, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

from tensorflow.compat.v1.keras.backend import set_session
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import model_from_json
from tensorflow.keras.layers import Dense, TimeDistributed
from tensorflow.keras import Input, Model, callbacks
from tensorflow.keras.utils import plot_model as plm
from tcn import TCN, tcn_full_summary
from tensorflow.keras.activations import swish


#.. local
sys.path.insert(0, '/content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/inc')
#from tran_data_split import tran_data_split
from tran_data_load import tran_data_load
from kmk_make_scorer import r2_3dim, mse_3dim, mae_3dim

### **GPU 관련 디바이스 설정**


*   Tensorflow 라이브러리 기반의 딥러닝 기법은 메모리 부족현상을 피하기 위해 처음부터 GPU의 사용가능 메모리를 최대로 잡는 설정이 있음. 이로인해 GPU 서버에서 다양한 작업이 불가해짐. 다음 설정을 통해 작업에 필요한 만큼만 메모리를 잡아줌



In [2]:
#-------------------------------------------------------------------------
# .. Device configuration


config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
set_session(tf.compat.v1.Session(config=config))


### **변수설정**
*   사용파일 관련 변수 설정
*   TCN 초모수 설정
*   초모수 최적화 탐색 조합 개수 설정





In [3]:
#-------------------------------------------------------------------------
# .. Data set

element = 'ALLV'
name_list = "/content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/SHEL/namelist.input"

hp_lr = 0.009
hp_pd = 'same'
hp_ns = 1
hp_dl = [1,2,4,8,17,34,68,136]
hp_ldl = hp_dl[-1] # last dilation factor to make name of save model
hp_bn = True
n_iter_search = 2   #테스트 실행을 위해 작은수 설정



### **네임리스트 정보 호출**

In [5]:
#-------------------------------------------------------------------------
# .. Read namelist

print ("1. Read namelist")
exists = os.path.isfile(name_list)
if exists:
    nml = f90nml.read(name_list)
    tran_data_per = nml['data_set']['tran_data_per']
    tran_num_his = nml['data_set']['tran_num_his']
    test_data_per = nml['data_set']['test_data_per']
    test_num_his = nml['data_set']['test_num_his']
    num_fct = nml['data_set']['num_fct']
    dev_stn_id = nml['data_set']['dev_stn_id']
    exp_name = nml['data_set']['exp_name']
    data_dir = nml['data_set']['data_dir']
    input_size = nml['data_set']['input_size']
    output_size = nml['data_set']['output_size']
    num_epoch = nml['hyper_para']['num_epoch']
    patience = nml['hyper_para']['patience']
    hp_nf = nml['hyper_para']['n_filter']
    hp_ks = nml['hyper_para']['s_kernel']
    hp_dr = nml['hyper_para']['drp_rate']
else:
    sys.exit("STOP Error: Could not found : "+ name_list)

print(nml)

#data_dir = './DAIN_MIX/'
#data_dir = './DAIN_HR136/'
data_dir = '/content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAIN/'
num_epoch = 2 # 테스트 수행을 위해 작은수 설정

# 출력자료 경로 설정
csv_outdir = '/content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAOU/LOSS/' + exp_name + '/'
model_outdir = '/content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAOU/MODL/' + exp_name + '/'
scalr_outdir = '/content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAOU/SCAL/' + exp_name + '/'
gifd_outdir = '/content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/GIFD/' + exp_name + '/'



1. Read namelist
&data_set
    tran_data_per = '2016050100-2021043000-24-1605-2104'
    tran_num_his = 1826
    test_data_per = '2021050100-2021092100-24-2105-2109'
    test_num_his = 144
    num_fct = 136
    dev_stn_id = 47003
    exp_name = 'OP_12UTC'
    data_dir = '/home/mankicom/STD_POOL/SHRT_GDPS/HOURLY1/MODL_DVLP/SPD/TEST/DAIN/'
    input_size = 10
    output_size = 1
/

&hyper_para
    num_epoch = 1000
    patience = 1000
    n_filter = 87
    s_kernel = 6
    drp_rate = 0.07
/


### **TCN 입력자료 호출**

*   군집분석을 통해 선정된 대표지점 훈련자료 로드
*   for 문을 통해 대표 지점개수만큼 로드한 후 한 변수에 표본개수를 병합



In [20]:
#-------------------------------------------------------------------------
# .. Fcst load :  data dim( input_size, num_stn, num_his, num_fct )
#
#    Trainining data used for cross-validation
#    Test data used to evaluate best model out of randomized search
#

print ("3. Training/valid data load, combine 4stn(47169, 47133, 47102, 47090) train sample")
#tran_rate = 0.8
eval_rate = 0.2
#rd_seed_fix = False
#nbin = 10

#combine_stn = [47169, 47133, 47102, 47090]
combine_stn = [47108, 47108, 47108, 47108]
for i in range(len(combine_stn)):
    tran_xx, tran_yy = tran_data_load(data_dir, tran_data_per,
                                     element, input_size, output_size, tran_num_his,
                                     num_fct, combine_stn[i])

    if i == 0:
       tran_x, tran_y = tran_xx, tran_yy
    else:
       tran_x = np.concatenate((tran_x,tran_xx), axis=1)
       tran_y = np.concatenate((tran_y,tran_yy), axis=1)

tran_x = np.swapaxes(tran_x,0,1)
tran_y = np.swapaxes(tran_y,0,1)

#-------------------------------------------------------------------------
# .. Check diemsion

input_size = tran_x.shape[2]
output_size = tran_y.shape[2]
print ("5. Select var")
print ('tran_x shape= ', tran_x.shape)    # batch, sequence, feature
print ('tran_y shape= ', tran_y.shape)



3. Training/valid data load, combine 4stn(47169, 47133, 47102, 47090) train sample
Read input:  /content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAIN/OBS/tran_obs_spd.2016050100-2021043000-24-1605-2104_47108
FILE date:  [2016    5    1    0] [2021    4   30    0] [24]
READ  DIMENSION: NV =  1
READ  DIMENSION: NS =  1
READ  DIMENSION: NH =  1826
READ  DIMENSION: NF =  136
USER Request dimension: 
USER  DIMENSION: NV =  1
USER  DIMENSION: NS =  1
USER  DIMENSION: NH =  1826
USER  DIMENSION: NF =  136
Read input:  /content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAIN/NWP/LC/tran_gmix_ALLV_nvar10.2016050100-2021043000-24-1605-2104_47108
FILE date:  [2016    5    1    0] [2021    4   30    0] [24]
READ  DIMENSION: NV =  10
READ  DIMENSION: NS =  1
READ  DIMENSION: NH =  1826
READ  DIMENSION: NF =  136
USER Request dimension: 
USER  DIMENSION: NV =  10
USER  DIMENSION: NS =  1
USER  DIMENSION: NH =  1826
USER  DIMENSION: NF =  136
Read obs raw dimension: 
Read nwp raw dimension: 
-----

  stn_id = f.read_record( np.dtype((np.int32,(NS1))) )


missing count =  59
missing days =  [248 248 248 ... 254 254 254]
Remove nan (array([  0,   0,   0, ..., 135, 135, 135]), array([248, 248, 248, ..., 254, 254, 254]), array([0, 1, 2, ..., 7, 8, 9]))
---------- Shape of after drop nan 
(136, 1767, 10)
(136, 1767, 1)
Read input:  /content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAIN/OBS/tran_obs_spd.2016050100-2021043000-24-1605-2104_47108
FILE date:  [2016    5    1    0] [2021    4   30    0] [24]
READ  DIMENSION: NV =  1
READ  DIMENSION: NS =  1
READ  DIMENSION: NH =  1826
READ  DIMENSION: NF =  136
USER Request dimension: 
USER  DIMENSION: NV =  1
USER  DIMENSION: NS =  1
USER  DIMENSION: NH =  1826
USER  DIMENSION: NF =  136
Read input:  /content/drive/MyDrive/BICR_DEEP_TEST/BAYES_OPT/SPD/DAIN/NWP/LC/tran_gmix_ALLV_nvar10.2016050100-2021043000-24-1605-2104_47108
FILE date:  [2016    5    1    0] [2021    4   30    0] [24]
READ  DIMENSION: NV =  10
READ  DIMENSION: NS =  1
READ  DIMENSION: NH =  1826
READ  DIMENSION: NF =  136
USER

### **정규화**


*   로드된 대표지점 병합 훈련자료를 정규화
*   Scikit-learn의 MinMaxScaler 함수를 이용해 피팅 후 변환
> scaler의 입력형태가 [batch, feature]를 요구하기 때문에 3차원 데이터를 2차원으로 변환후 스케일링 수행. 수행이 끝나면 다시 3차원으로 복원







In [21]:
#-------------------------------------------------------------------------
# .. Normalize

# .. initialaize
tr_b, tr_s, tr_f = tran_x.shape[0], tran_x.shape[1], tran_x.shape[2]


# .. get restorator with obs range
nwp_scaler = MinMaxScaler()   # copy default true
obs_scaler = MinMaxScaler()
nwp_scaler.fit(tran_x.view().reshape(tr_b*tr_s, tr_f))
obs_scaler.fit(tran_y.view().reshape(tr_b*tr_s, output_size))

# .. feature normalize   ( train seq, feature = test seq, feature )
nor_tran_x = nwp_scaler.transform(tran_x.reshape(tr_b*tr_s, tr_f))
nor_tran_x = nor_tran_x.reshape(tr_b,tr_s,tr_f)
nor_tran_y = obs_scaler.transform(tran_y.reshape(tr_b*tr_s, output_size))
nor_tran_y = nor_tran_y.reshape(tr_b,tr_s, output_size)



print ('---------- Final training data shape')
print(type(nor_tran_x))
print ('tran nwp : ', nor_tran_x.shape)
print ('tran obs : ', nor_tran_y.shape)


---------- Final training data shape
<class 'numpy.ndarray'>
tran nwp :  (7068, 136, 10)
tran obs :  (7068, 136, 1)


### **TCN모델 설정 및 초모수 최적화 설정**


*   scikit-optimizer를 이용한 초모수 최적화를 위해 TCN 모델을 반환해주는 함수 정의(create_model(아규먼트에 최적화할 변수 사전정의))
*   keras-tcn과 scikit-optimizer와의 호환을 위해 KerasRegressor 함수로 wrapping



In [24]:

#=========================================================================
# .. Model configuration


#-------------------------------------------------------------------------
# .. Set mini batch for cross-validation

#num_cv=5
num_cv=2 #테스트를 위해 작은수 설정
batch_size = int( nor_tran_x.shape[0]*0.1 )


print ('input_size: ', input_size)
print ('batch_size: ', batch_size)
print ('time_lenght: ', num_fct)


#-------------------------------------------------------------------------
# .. Set Model


# .. Define model
# 최적화할 변수들은 함수 아규먼트 칸에 사전 정의 해주세요
def create_model(dropout_rate=0.15, nb_filters=7, kernel_size=3):

          print ('================== Model called ========================')
          print ('input_size: ', input_size)
          print ('batch_size: ', batch_size)
          print ('time_lenght: ', num_fct)
          print ('nb_filters: ', nb_filters)
          print ('kernel_size: ', kernel_size)
          print ('dropout_rate: ', dropout_rate)
          print ('dilations: ', hp_dl)
          dropout_rate = np.round(dropout_rate,2)
          print ('dropout_rate: ', dropout_rate)

          ## .. clear keras model
          K.clear_session()

          # .. create model
          #i = Input( batch_shape=(batch_size, num_fct, input_size) )
          # batch=None 중요!!, 미니배치 크기를 훈련자료 개수의 %로 정의하기 때문에 
          # 딱 나누어 떨어지지 않으면 일정값을 정의할 경우 에러발생 
          i = Input( batch_shape=(None, num_fct, input_size) )
          o = TCN(return_sequences=True,
                  activation=swish,
                  nb_filters=nb_filters,
                  padding=hp_pd,
                  use_batch_norm = hp_bn,
                  nb_stacks=hp_ns,
                  dropout_rate=dropout_rate,
                  kernel_size=kernel_size,
                  use_skip_connections=True,
                  dilations=hp_dl
                  )(i)
          o = TimeDistributed(Dense(output_size, activation='linear'))(o)

          # .. compile
          adam = optimizers.Adam(lr=hp_lr)

          m= Model(inputs=[i], outputs=[o])
          m.compile(optimizer=adam, loss='mse')

          m.summary()

          return m


# .. Wrapping create_model for scikit-optimizer form
model = KerasRegressor(build_fn=create_model,
                       verbose=1,
                       epochs=num_epoch,
                       batch_size=batch_size,
                       shuffle=True)

input_size:  10
batch_size:  706
time_lenght:  136




### **BayesSearchCV를 이용한 TCN 초모수 최적화**


*   탐색할 초모수 및 범위 설정
*   교차검증 스코어 설정
*   optimizer 설정(=BayesSearchCV)
*   최적화 결과출력(Best 조합 및 rank 별 조합)





In [13]:
#-------------------------------------------------------------------------
# .. Use bayes_opt

# .. Exp para set
#param_dist = { 'padding': Categorical(['causal','same']),
#               'nb_stacks': Integer(1,5),
#               'nb_filters': Categorical([7,20,30]),
#               'kernel_size': Integer(2,24) }
#param_dist = { 'dropout_rate': Real(0.01, 0.2),
#               'nb_filters': Integer(50,100),
#               'kernel_size': Integer(3,12) }
##param_dist = { 'dropout_rate': Categorical([0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.15]),
#               'nb_filters': Integer(50,100),
#               'kernel_size': Integer(3,12) }
param_dist = { 'nb_filters': Integer(50,100),
               'kernel_size': Integer(3,12) }

set_eval_score = { 'MAE': make_scorer(mae_3dim),
                   'MSE': make_scorer(mse_3dim),
                   'R2': make_scorer(r2_3dim) }

print ( param_dist )

optimizer =  BayesSearchCV( estimator=model,
                            search_spaces=param_dist,
                            scoring=make_scorer(r2_3dim),
                            refit=False,
                            cv=num_cv,
                            n_iter=n_iter_search,
                            return_train_score=True,
                            verbose=1,
                            n_jobs=1,
                            random_state=1 )

start = time()
print(nor_tran_x.shape, nor_tran_y.shape)
optimizer.fit(nor_tran_x, nor_tran_y)

print(type(optimizer.cv_results_))
print(optimizer.cv_results_)

# .. Report
def report(result, n_top=n_iter_search):
    for i in range(n_top):
        candidates = [ result['rank_test_score'][i] ]
        for candidate in candidates:
            print("Rank: %0d, R2: %.3f with %r" %
                  ( i, result['mean_test_score'][candidate-1],
                       result['params'][candidate-1] ) )


print("BayesSearchCV took %.2f seconds for %d candidates"
      " parameter settings. " % ((time() - start), n_iter_search))

print( "Best: %f using %s" % ( optimizer.best_score_,
                               optimizer.best_params_ ) )

report(optimizer.cv_results_)

{'nb_filters': Integer(low=50, high=100, prior='uniform', transform='identity'), 'kernel_size': Integer(low=3, high=12, prior='uniform', transform='identity')}
(7068, 136, 10) (7068, 136, 1)
Fitting 2 folds for each of 1 candidates, totalling 2 fits
input_size:  10
batch_size:  565
time_lenght:  136
nb_filters:  53
kernel_size:  9
dropout_rate:  0.15
dilations:  [1, 2, 4, 8, 17, 34, 68, 136]
dropout_rate:  0.15
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 136, 10)]         0         
                                                                 
 tcn (TCN)                   (None, 136, 53)           388808    
                                                                 
 time_distributed (TimeDistr  (None, 136, 1)           54        
 ibuted)                                                         
                                          

  super(Adam, self).__init__(name, **kwargs)


Epoch 2/2
input_size:  10
batch_size:  565
time_lenght:  136
nb_filters:  53
kernel_size:  9
dropout_rate:  0.15
dilations:  [1, 2, 4, 8, 17, 34, 68, 136]
dropout_rate:  0.15
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 136, 10)]         0         
                                                                 
 tcn (TCN)                   (None, 136, 53)           388808    
                                                                 
 time_distributed (TimeDistr  (None, 136, 1)           54        
 ibuted)                                                         
                                                                 
Total params: 388,862
Trainable params: 387,166
Non-trainable params: 1,696
_________________________________________________________________
Epoch 1/2


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/2
Fitting 2 folds for each of 1 candidates, totalling 2 fits
input_size:  10
batch_size:  565
time_lenght:  136
nb_filters:  67
kernel_size:  9
dropout_rate:  0.15
dilations:  [1, 2, 4, 8, 17, 34, 68, 136]
dropout_rate:  0.15
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 136, 10)]         0         
                                                                 
 tcn (TCN)                   (None, 136, 67)           618142    
                                                                 
 time_distributed (TimeDistr  (None, 136, 1)           68        
 ibuted)                                                         
                                                                 
Total params: 618,210
Trainable params: 616,066
Non-trainable params: 2,144
_________________________________________________________________
Epoch 1/2


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/2
input_size:  10
batch_size:  565
time_lenght:  136
nb_filters:  67
kernel_size:  9
dropout_rate:  0.15
dilations:  [1, 2, 4, 8, 17, 34, 68, 136]
dropout_rate:  0.15
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 136, 10)]         0         
                                                                 
 tcn (TCN)                   (None, 136, 67)           618142    
                                                                 
 time_distributed (TimeDistr  (None, 136, 1)           68        
 ibuted)                                                         
                                                                 
Total params: 618,210
Trainable params: 616,066
Non-trainable params: 2,144
_________________________________________________________________
Epoch 1/2


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/2
<class 'dict'>
{'mean_fit_time': array([17.32583845, 20.65430403]), 'std_fit_time': array([5.14557374, 6.76235747]), 'mean_score_time': array([1.41881144, 1.72049332]), 'std_score_time': array([0.00847042, 0.33814955]), 'param_kernel_size': masked_array(data=[9, 9],
             mask=[False, False],
       fill_value='?',
            dtype=object), 'param_nb_filters': masked_array(data=[53, 67],
             mask=[False, False],
       fill_value='?',
            dtype=object), 'params': [OrderedDict([('kernel_size', 9), ('nb_filters', 53)]), OrderedDict([('kernel_size', 9), ('nb_filters', 67)])], 'split0_test_score': array([-6.84332979e+10, -1.46141808e+08]), 'split1_test_score': array([-1.16807358e+10, -5.55390509e+10]), 'mean_test_score': array([-4.00570168e+10, -2.78425964e+10]), 'std_test_score': array([2.83762811e+10, 2.76964546e+10]), 'rank_test_score': array([2, 1], dtype=int32), 'split0_train_score': array([-6.84332979e+10, -1.46141808e+08]), 'split1_train_score': ar

### **Best 초모수 조합 출력**

In [17]:
#=========================================================================
# .. Second refit to evaluate best model use test period


#-------------------------------------------------------------------------
# .. Set model label

# .. best model configuration for whole train set
params = optimizer.best_params_
print(type(params))
print(params)

<class 'collections.OrderedDict'>
OrderedDict([('kernel_size', 9), ('nb_filters', 67)])
