In [11]:
from ipynb.fs.full.CommonFunctions import col_lagger,split_sequences
import os
import pandas as pd
import numpy as np
import hydroeval as he
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras.models import *
from keras.layers import *
import warnings
from keras.optimizers import *
from keras.callbacks import *
from tqdm import tqdm_notebook

In [12]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings('ignore')

plt.style.use('ggplot')
plt.rcParams["font.family"] = 'Nanum Brush Script OTF'
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams['font.size'] = 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['axes.labelsize'] = 12

CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'

color_list = [CB91_Blue, 
              CB91_Pink, 
              CB91_Green, 
              CB91_Amber,
              CB91_Purple, 
              CB91_Violet]

plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)
mpl.rcParams['axes.unicode_minus'] = False

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [13]:
df = pd.read_csv('..//Processed_data//weather_dam_wrn.csv',encoding='cp949',index_col=[0])
df.index = pd.to_datetime(df.index)

In [14]:
df[['강우량(mm)+1']] = df[['강우량(mm)']].shift(-1)
#df[['강우량(mm)+2']] = df[['강우량(mm)']].shift(-2)
df = col_lagger(df,'강우량(mm)',7)
df = col_lagger(df,'유입량(㎥/s)',7)
df = col_lagger(df,'최저기온(°C)',7)
df = col_lagger(df,'최고기온(°C)',7)
df = col_lagger(df,'평균 풍속(m/s)',7)
df = col_lagger(df,'호우경보',7)
df = col_lagger(df,'평균 상대습도(%)',7)
df = col_lagger(df,'합계 일사량(MJ/m2)',7)

In [15]:
df = df.drop(columns=['최저기온(°C)', '최고기온(°C)', 
                      '평균 풍속(m/s)', '평균 상대습도(%)',
                      '합계 일사량(MJ/m2)','호우경보'])
df = df.dropna()

In [16]:
len(df.columns)

59

In [17]:
df.columns

Index(['강우량(mm)', '유입량(㎥/s)', '강우량(mm)+1', '강우량(mm)_1', '강우량(mm)_2',
       '강우량(mm)_3', '강우량(mm)_4', '강우량(mm)_5', '강우량(mm)_6', '강우량(mm)_7',
       '유입량(㎥/s)_1', '유입량(㎥/s)_2', '유입량(㎥/s)_3', '유입량(㎥/s)_4', '유입량(㎥/s)_5',
       '유입량(㎥/s)_6', '유입량(㎥/s)_7', '최저기온(°C)_1', '최저기온(°C)_2', '최저기온(°C)_3',
       '최저기온(°C)_4', '최저기온(°C)_5', '최저기온(°C)_6', '최저기온(°C)_7', '최고기온(°C)_1',
       '최고기온(°C)_2', '최고기온(°C)_3', '최고기온(°C)_4', '최고기온(°C)_5', '최고기온(°C)_6',
       '최고기온(°C)_7', '평균 풍속(m/s)_1', '평균 풍속(m/s)_2', '평균 풍속(m/s)_3',
       '평균 풍속(m/s)_4', '평균 풍속(m/s)_5', '평균 풍속(m/s)_6', '평균 풍속(m/s)_7',
       '호우경보_1', '호우경보_2', '호우경보_3', '호우경보_4', '호우경보_5', '호우경보_6', '호우경보_7',
       '평균 상대습도(%)_1', '평균 상대습도(%)_2', '평균 상대습도(%)_3', '평균 상대습도(%)_4',
       '평균 상대습도(%)_5', '평균 상대습도(%)_6', '평균 상대습도(%)_7', '합계 일사량(MJ/m2)_1',
       '합계 일사량(MJ/m2)_2', '합계 일사량(MJ/m2)_3', '합계 일사량(MJ/m2)_4',
       '합계 일사량(MJ/m2)_5', '합계 일사량(MJ/m2)_6', '합계 일사량(MJ/m2)_7'],
      dtype='object')

In [18]:
X = df[[c for c in df.columns if '유입량(㎥/s)'  != c]].values
y = df[['유입량(㎥/s)']].values

data_stacked = np.hstack((X,y))

del X
del y
X,y = split_sequences(data_stacked,1,2)
train_length = int(df.shape[0]*0.8)

train_X , train_y = X[:train_length, :] , y[:train_length, :]
test_X , test_y = X[train_length:, :] , y[train_length:, :]

# Scaling X and y

In [19]:
scaler1x = StandardScaler()
scaler1y = StandardScaler()

#Scale X
train_X_prescaled = train_X.reshape(train_X.shape[0],-1)
train_X1 = scaler1x.fit_transform(train_X_prescaled).reshape(train_X_prescaled.shape[0],1,train_X_prescaled.shape[-1])

test_X_prescaled = test_X.reshape(test_X.shape[0],-1)
test_X1 = scaler1x.transform(test_X_prescaled).reshape(test_X.shape[0],1,test_X.shape[-1])

#Scale y
train_y_prescaled = train_y.reshape(train_y.shape[0],-1)
train_y1 = scaler1y.fit_transform(train_y_prescaled).reshape(train_y.shape[0],1,train_y.shape[-1])

test_y_prescaled = test_y.reshape(test_y.shape[0],-1)
test_y1 = scaler1y.transform(test_y_prescaled).reshape(test_y.shape[0],1,test_y.shape[-1])

In [20]:
print(train_X1.shape)
print(test_X1.shape)
print(train_y1.shape)
print(test_y1.shape)

(4520, 1, 58)
(1130, 1, 58)
(4520, 1, 2)
(1130, 1, 2)


# Testing with Bi_LSTM seq2seq(38,38,38)

In [None]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [None]:
lr=[0.1,0.01,0.001]
batches = [64,128,256]

for batch in tqdm_notebook(batches, desc='Batch Size'):
    for rate in tqdm_notebook(lr,leave=False, desc='Learning Rate'):
        print('*'*80)
        print(f'\nBatch Size: {batch}...Learning Rate: {rate}\n')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                  min_delta=0.1,
                                  patience=5, min_lr=1e-5, vebose=1)

        opt = tf.keras.optimizers.Adam(learning_rate = rate)
        encoder_inputs = Input(shape=(train_X1.shape[1], train_X1.shape[2]))


        encoder_l1 = Bidirectional(LSTM(118,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        return_state=True,
                                        recurrent_dropout=0.5))



        encoder_outputs1 = encoder_l1(encoder_inputs)
        encoder_states1  = encoder_outputs1[1:]

        encoder_l2 = Bidirectional(LSTM(118,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',
                                        return_state=True,
                                        recurrent_dropout=0.5))



        encoder_outputs2 = encoder_l2(encoder_outputs1[0])
        encoder_states2 = encoder_outputs2[1:]

        encoder_l3 = Bidirectional(LSTM(118,
                                        return_sequences=False,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        return_state=True,
                                        recurrent_dropout=0.5))


        encoder_outputs3 = encoder_l3(encoder_outputs2[0])
        encoder_states3 = encoder_outputs3[1:]

        #Decoder
        decoder_inputs = RepeatVector(1)(encoder_outputs3[0])

        decoder_l1 = Bidirectional(LSTM(118,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        recurrent_dropout=0.5))(decoder_inputs,initial_state=encoder_states1)


        decoder_l2 = Bidirectional(LSTM(118,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        recurrent_dropout=0.5))(decoder_l1,initial_state=encoder_states2)


        decoder_l3 = Bidirectional(LSTM(118,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        recurrent_dropout=0.5))(decoder_l2,initial_state=encoder_states3)


        decoder_outputs2 = TimeDistributed(Dense(2))(decoder_l3)

        model = tf.keras.models.Model(encoder_inputs,decoder_outputs2)
        model.compile(loss='mse',optimizer = opt,metrics = ['mse'])
        
        history = model.fit(train_X1, train_y1, 
                            epochs=1, 
                            verbose=0,
                            batch_size=batch,
                            shuffle=False,
                            validation_split=0.1,
                            callbacks=[reduce_lr])
        
        y_hat = model.predict(test_X1)

        y_hat_inv = scaler1y.inverse_transform(y_hat).reshape(y_hat.shape[0],-1)
        test_y1_inv = scaler1y.inverse_transform(test_y1).reshape(test_y1.shape[0],-1)

        print(f'RMSE for the first day: {mean_squared_error(test_y1_inv[:,0],y_hat_inv[:,0],squared=False):.2f}')
        print(f'MAE for the first day: {mean_absolute_error(test_y1_inv[:,0],y_hat_inv[:,0]):.2f}\n')

        print(f'RMSE for the second day: {mean_squared_error(test_y1_inv[:,1],y_hat_inv[:,1],squared=False):.2f}')
        print(f'MAE for the second day: {mean_absolute_error(test_y1_inv[:,1],y_hat_inv[:,1]):.2f}\n')

        print(f'NSE: {he.nse(test_y1_inv,y_hat_inv)}\n')
        model_yaml = model.to_yaml()
        with open(f"bi_lstm118_s2s_lr{rate}_batch_{batch}.yaml", "w") as yaml_file:
            yaml_file.write(model_yaml)

        model.save_weights(f"bi_lstm118_s2s_lr{rate}_batch_{batch}.h5")
        print("Saved model to disk")