### 편차보정모델 개발

데이터 로드, 결측제거, 변수 선택

In [4]:
import numpy as np
import pandas as pd
from data_split import data_split

import sys
sys.path.insert(0, '../')
from config.global_params import variable_info


# 인풋 준비
nwp_file = "../DAIO/nwp_data_47105"
obs_file = "../DAIO/obs_data_47105"
nwp_data = np.load(nwp_file)
obs_data = np.load(obs_file)
print("="*50, "load data shape")
print(nwp_data.shape)
print(obs_data.shape)


# train([21.01,04, 22.01,04]) / test([23.01,04]) 분할  
class_split = data_split(nwp_data, obs_data)
train_nwp, test_nwp, train_obs, test_obs = class_split.get_split_data()
print("="*50, "split data shape")
print(train_nwp.shape)
print(train_obs.shape)
print(test_nwp.shape)
print(test_obs.shape)



# 결측제거
missing_nwp_train = set(np.where(np.isnan(train_nwp))[0])
missing_obs_train = set(np.where(np.isnan(train_obs))[0])
missing_all_train = list(missing_nwp_train | missing_obs_train)
print("결측 합계: ", len(missing_all_train))
dm_nwp_train = np.delete(train_nwp, missing_all_train, 0)
dm_obs_train = np.delete(train_obs, missing_all_train, 0)
print("shape of after drop")
print(dm_nwp_train.shape)
print(dm_obs_train.shape)

missing_nwp_test = set(np.where(np.isnan(test_nwp))[0])
missing_obs_test = set(np.where(np.isnan(test_obs))[0])
missing_all_test = list(missing_nwp_test | missing_obs_test)
print("결측 합계: ", len(missing_all_test))
dm_nwp_test = np.delete(test_nwp, missing_all_test, 0)
dm_obs_test = np.delete(test_obs, missing_all_test, 0)
print("shape of after drop")
print(dm_nwp_test.shape)
print(dm_obs_test.shape)


# 변수선택
sel_var = ['NDNSW_surface', 'UGRD_10m', 'VGRD_10m', 'RH_1_5ma', 'MAXGUST_0m', 'PRMSL_meansealevel']
var_list_dict = list(variable_info.keys())
var_index = [ var_list_dict.index(i) for i in sel_var ]
#print(var_list_dict)
#print(var_index)
sel_dm_nwp_train = dm_nwp_train[:,:,var_index]
sel_dm_nwp_test = dm_nwp_test[:,:,var_index]
print("="*50, "drop data shape")
print(sel_dm_nwp_train.shape)
print(dm_obs_train.shape)
print(sel_dm_nwp_test.shape)
print(dm_obs_test.shape)

(868, 49, 20)
(868, 49, 2)
(109, 49, 20)
(109, 49, 2)
(61, 49, 20)
(61, 49, 2)
결측 합계:  1
shape of after drop
(108, 49, 20)
(108, 49, 2)
결측 합계:  5
shape of after drop
(56, 49, 20)
(56, 49, 2)
(108, 49, 6)
(108, 49, 2)
(56, 49, 6)
(56, 49, 2)


스케일링

In [18]:
# .. 스케일링 및 데이터 분할
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#-------------------------------------------------------------------------
# .. Normalize

output_size = 2

# .. initialaize
tr_b, tr_s, tr_f = dm_nwp_train.shape[0], dm_nwp_train.shape[1], dm_nwp_train.shape[2]      
ts_b, ts_s, ts_f = dm_nwp_test.shape[0], dm_nwp_test.shape[1], dm_nwp_test.shape[2]      

# .. get restorator with obs range
nwp_scaler = MinMaxScaler()   # copy default true
obs_scaler = MinMaxScaler()
nwp_scaler.fit(dm_nwp_train.view().reshape(tr_b*tr_s, tr_f))
obs_scaler.fit(dm_obs_train.view().reshape(tr_b*tr_s, output_size))

# .. feature normalize   ( train seq, feature = test seq, feature )
nor_dm_nwp_train = nwp_scaler.transform(dm_nwp_train.reshape(tr_b*tr_s, tr_f))
nor_dm_nwp_train = nor_dm_nwp_train.reshape(tr_b,tr_s,tr_f)
nor_dm_obs_train = obs_scaler.transform(dm_obs_train.reshape(tr_b*tr_s, output_size))
nor_dm_obs_train = nor_dm_obs_train.reshape(tr_b,tr_s, output_size)

nor_dm_nwp_test = nwp_scaler.transform(dm_nwp_test.reshape(ts_b*ts_s, ts_f))
nor_dm_nwp_test = nor_dm_nwp_test.reshape(ts_b,ts_s,ts_f)
nor_dm_obs_test = obs_scaler.transform(dm_obs_test.reshape(ts_b*ts_s, output_size))
nor_dm_obs_test = nor_dm_obs_test.reshape(ts_b,ts_s, output_size)

nor_dm_nwp_train = nor_dm_nwp_train[:,1::,:]
nor_dm_obs_train = nor_dm_obs_train[:,1::,:]

nor_dm_nwp_test = nor_dm_nwp_test[:,1::,:]
nor_dm_obs_test = nor_dm_obs_test[:,1::,:]

print ('---------- Final training data shape')
print(type(nor_dm_nwp_train))
print ('tran nwp : ', nor_dm_nwp_train.shape)
print ('tran obs : ', nor_dm_obs_train.shape)
print ('test nwp : ', nor_dm_nwp_test.shape)
print ('test obs : ', nor_dm_obs_test.shape)

---------- Final training data shape
<class 'numpy.ndarray'>
tran nwp :  (108, 48, 20)
tran obs :  (108, 48, 2)
test nwp :  (56, 48, 20)
test obs :  (56, 48, 2)


#### 모델 설정

In [1]:
from tensorflow.compat.v1.keras.backend import set_session
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import model_from_json
from tensorflow.keras.layers import Dense, TimeDistributed
from tensorflow.keras import Input, Model, callbacks
from tensorflow.keras.utils import plot_model as plm
from tcn import TCN, tcn_full_summary
from tensorflow.keras.activations import swish


#-------------------------------------------------------------------------
# .. Set configure

config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
set_session(tf.compat.v1.Session(config=config))


#-------------------------------------------------------------------------
# .. Data set

element = 'ALLV'
name_list = "./SHEL/namelist.input"

hp_lr = 0.009
hp_pd = 'same'
hp_ns = 1
hp_dl = [1,2,4,8,16,32,48]
hp_ldl = hp_dl[-1] # last dilation factor to make name of save model
hp_bn = True

input_size = 6
output_size = 2
num_fct = 48
batch_size = 8
n_iter_search = 20



def create_model(dropout_rate=0.15, nb_filters=7, kernel_size=3): 

          print ('================== Model called ========================')
          print ('input_size: ', input_size)
          print ('batch_size: ', batch_size)
          print ('time_lenght: ', num_fct)
          print ('nb_filters: ', nb_filters)
          print ('kernel_size: ', kernel_size)
          print ('dropout_rate: ', dropout_rate)
          print ('dilations: ', hp_dl)
          dropout_rate = np.round(dropout_rate,2)
          print ('dropout_rate: ', dropout_rate)
          
          ## .. clear keras model
          K.clear_session()

          # .. create model
          #i = Input( batch_shape=(batch_size, num_fct, input_size) )
          i = Input( batch_shape=(None, num_fct, input_size) )
          o = TCN(return_sequences=True,
                  activation=swish,
                  nb_filters=nb_filters,
                  padding=hp_pd,
                  use_batch_norm = hp_bn,
                  nb_stacks=hp_ns,
                  dropout_rate=dropout_rate,
                  kernel_size=kernel_size,
                  use_skip_connections=True,
                  dilations=hp_dl
                  )(i)
          o = TimeDistributed(Dense(output_size, activation='linear'))(o)

          # .. compile
          adam = optimizers.Adam(lr=hp_lr)

          m= Model(inputs=[i], outputs=[o])
          m.compile(optimizer=adam, loss='mse')

          m.summary()

          return m

TypeError: Descriptors cannot not be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [17]:
model = create_model(dropout_rate=0.09, nb_filters=80, kernel_size=6)
model.fit(train_ds, epochs=10, verbose=1)

1
2
4
8
16
32
64
128
