In [4]:
import pandas as pd
import numpy as np
import hydroeval as he
from sklearn.preprocessing import *
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score,max_error
from keras.models import *
from keras.layers import *
import warnings
from keras.optimizers import *
import copy
import os
from keras.callbacks import *
from tqdm import tqdm_notebook

In [5]:
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

mpl.rcParams['axes.unicode_minus'] = False
plt.style.use('ggplot')
plt.rcParams["font.family"] = 'Nanum Brush Script OTF'
mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams['font.size'] = 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['axes.labelsize'] = 12

CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'

color_list = [CB91_Blue, 
              CB91_Pink, 
              CB91_Green, 
              CB91_Amber,
              CB91_Purple, 
              CB91_Violet]

plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)


In [6]:
df = pd.read_csv('weather_dam_wrn.csv',encoding='cp949',index_col=[0])
df.index = pd.to_datetime(df.index)

In [7]:
def col_lagger(df,x,num_lags):
    df2 = copy.deepcopy(df)
    for lag in range(1,num_lags+1):
        df2[x+'_'+str(lag)] = df2[x].shift(lag)
        
    return df2

In [8]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [9]:
df.drop(columns=['대설경보', '태풍경보', '폭염경보', '한파경보'],inplace=True)

In [11]:
df[['강우량(mm)+1']] = df[['강우량(mm)']].shift(-1)
#df[['강우량(mm)+2']] = df[['강우량(mm)']].shift(-2)
df = col_lagger(df,'강우량(mm)',7)
df = col_lagger(df,'유입량(㎥/s)',7)
df = col_lagger(df,'최저기온(°C)',7)
df = col_lagger(df,'최고기온(°C)',7)
df = col_lagger(df,'평균 풍속(m/s)',7)
df = col_lagger(df,'호우경보',7)
df = col_lagger(df,'평균 상대습도(%)',7)
df = col_lagger(df,'합계 일사량(MJ/m2)',7)

In [12]:
df = df.drop(columns=['최저기온(°C)', '최고기온(°C)', 
                      '평균 풍속(m/s)', '평균 상대습도(%)',
                      '합계 일사량(MJ/m2)','호우경보'])
df = df.dropna()

In [15]:
X = df[[c for c in df.columns if '유입량(㎥/s)'  != c]].values
y = df[['유입량(㎥/s)']].values

data_stacked = np.hstack((X,y))

del X
del y
X,y = split_sequences(data_stacked,1,2)
train_length = int(df.shape[0]*0.8)

train_X , train_y = X[:train_length, :] , y[:train_length, :]
test_X , test_y = X[train_length:, :] , y[train_length:, :]

# Scaling X and y

In [17]:
scaler1x = StandardScaler()
scaler1y = StandardScaler()

#Scale X
train_X_prescaled = train_X.reshape(train_X.shape[0],-1)
train_X1 = scaler1x.fit_transform(train_X_prescaled).reshape(train_X_prescaled.shape[0],1,train_X_prescaled.shape[-1])

test_X_prescaled = test_X.reshape(test_X.shape[0],-1)
test_X1 = scaler1x.transform(test_X_prescaled).reshape(test_X.shape[0],1,test_X.shape[-1])

#Scale y
train_y_prescaled = train_y.reshape(train_y.shape[0],-1)
train_y1 = scaler1y.fit_transform(train_y_prescaled).reshape(train_y.shape[0],1,train_y.shape[-1])

test_y_prescaled = test_y.reshape(test_y.shape[0],-1)
test_y1 = scaler1y.transform(test_y_prescaled).reshape(test_y.shape[0],1,test_y.shape[-1])

# Testing with Bi_LSTM seq2seq

In [19]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [20]:
#opt = tf.keras.optimizers.Adam(learning_rate=0.01)



lr=[0.1,0.01,0.001]


for batch in tqdm_notebook([64,128,256], desc='Batch Size'):
    for rate in tqdm_notebook(lr,leave=False, desc='Learning Rate'):
        #print(f'batch: {batch}')
        #print(f'rate: {rate}')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                  min_delta=0.1,
                                  patience=5, min_lr=1e-5, vebose=1)

        opt = tf.keras.optimizers.Adam(learning_rate = rate)
        encoder_inputs = Input(shape=(train_X1.shape[1], train_X1.shape[2]))


        encoder_l1 = Bidirectional(LSTM(177,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        return_state=True,
                                        recurrent_dropout=0.5))



        encoder_outputs1 = encoder_l1(encoder_inputs)
        encoder_states1  = encoder_outputs1[1:]

        encoder_l2 = Bidirectional(LSTM(177,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',
                                        return_state=True,
                                        recurrent_dropout=0.5))



        encoder_outputs2 = encoder_l2(encoder_outputs1[0])
        encoder_states2 = encoder_outputs2[1:]

        encoder_l3 = Bidirectional(LSTM(177,
                                        return_sequences=False,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        return_state=True,
                                        recurrent_dropout=0.5))


        encoder_outputs3 = encoder_l3(encoder_outputs2[0])
        encoder_states3 = encoder_outputs3[1:]

        #Decoder
        decoder_inputs = RepeatVector(1)(encoder_outputs3[0])

        decoder_l1 = Bidirectional(LSTM(177,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        recurrent_dropout=0.5))(decoder_inputs,initial_state=encoder_states1)


        decoder_l2 = Bidirectional(LSTM(177,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        recurrent_dropout=0.5))(decoder_l1,initial_state=encoder_states2)


        decoder_l3 = Bidirectional(LSTM(177,
                                        return_sequences=True,
                                        activation='selu',
                                        kernel_initializer='lecun_normal',                                
                                        recurrent_dropout=0.5))(decoder_l2,initial_state=encoder_states3)


        decoder_outputs2 = TimeDistributed(Dense(2))(decoder_l3)

        model = tf.keras.models.Model(encoder_inputs,decoder_outputs2)
        model.compile(loss='mse',optimizer = opt,metrics = ['mse'])
        
        history = model.fit(train_X1, train_y1, 
                            epochs=3000, 
                            verbose=0,
                            batch_size=batch,
                            shuffle=False,
                            validation_split=0.1,
                            callbacks=[reduce_lr])

        print()
        print(f'Size 38, Batch Size: {batch}, Learning Rate:{rate}\n\n') 
        
        y_hat = model.predict(test_X1)

        y_hat_inv = scaler1y.inverse_transform(y_hat).reshape(y_hat.shape[0],-1)
        test_y1_inv = scaler1y.inverse_transform(test_y1).reshape(test_y1.shape[0],-1)

        print(f'RMSE 1일차: {mean_squared_error(test_y1_inv[:,0],y_hat_inv[:,0],squared=False):.2f}')
        print(f'MAE 1일차: {mean_absolute_error(test_y1_inv[:,0],y_hat_inv[:,0]):.2f}')
        print(f'R2 1일: {r2_score(test_y1_inv[:,0],y_hat_inv[:,0])}')
        print(f'Max Error 1일: {max_error(test_y1_inv[:,0],y_hat_inv[:,0])}')
        print()
        print()
        print(f'RMSE 2일차: {mean_squared_error(test_y1_inv[:,1],y_hat_inv[:,1],squared=False):.2f}')
        print(f'MAE 2일차: {mean_absolute_error(test_y1_inv[:,1],y_hat_inv[:,1]):.2f}')
        print(f'R2 2일: {r2_score(test_y1_inv[:,1],y_hat_inv[:,1])}')
        print(f'Max Error 2일: {max_error(test_y1_inv[:,1],y_hat_inv[:,1])}')
        print()
        print()
        print(f'NSE: {he.nse(test_y1_inv,y_hat_inv)}')
        model_yaml = model.to_yaml()
        with open(f"bi_lstm177_lr{rate}_batch_{batch}.yaml", "w") as yaml_file:
            yaml_file.write(model_yaml)

        model.save_weights(f"bi_lstm177_lr{rate}_batch_{batch}.h5")
        print("Saved model to disk")

Batch Size:   0%|          | 0/3 [00:00<?, ?it/s]

Learning Rate:   0%|          | 0/3 [00:00<?, ?it/s]


Size 38, Batch Size: 64, Learning Rate:0.1


RMSE 1일차: 1065271000.78
MAE 1일차: 52053484.31
R2 1일: -47241469713544.14
Max Error 1일: 26986082502.7


RMSE 2일차: 1085431420.43
MAE 2일차: 56593813.54
R2 2일: -49045079890418.52
Max Error 2일: 24240076878.11


NSE: [0.50826524 0.48947681]
Saved model to disk

Size 38, Batch Size: 64, Learning Rate:0.01


RMSE 1일차: 262.50
MAE 1일차: 95.89
R2 1일: -1.8685027187502952
Max Error 1일: 2777.514599609375


RMSE 2일차: 311.85
MAE 2일차: 100.02
R2 2일: -3.04843857527578
Max Error 2일: 4985.08107421875


NSE: [0.64968691 0.50557413]
Saved model to disk

Size 38, Batch Size: 64, Learning Rate:0.001


RMSE 1일차: 49.01
MAE 1일차: 15.56
R2 1일: 0.9000163400955414
Max Error 1일: 699.0261279296874


RMSE 2일차: 70.08
MAE 2일차: 18.34
R2 2일: 0.7955455424697939
Max Error 2일: 1375.18703125


NSE: [0.95690011 0.9118634 ]
Saved model to disk


Learning Rate:   0%|          | 0/3 [00:00<?, ?it/s]


Size 38, Batch Size: 128, Learning Rate:0.1


RMSE 1일차: 4196195.12
MAE 1일차: 803332.40
R2 1일: -733017832.5092229
Max Error 1일: 91735920.47


RMSE 2일차: 4133908.77
MAE 2일차: 660674.85
R2 2일: -711397677.744174
Max Error 2일: 133463103.8


NSE: [0.49243088 0.50738728]
Saved model to disk

Size 38, Batch Size: 128, Learning Rate:0.01


RMSE 1일차: 57.52
MAE 1일차: 17.14
R2 1일: 0.8622693624906312
Max Error 1일: 864.6687890625


RMSE 2일차: 71.46
MAE 2일차: 19.56
R2 2일: 0.7874509942772348
Max Error 2일: 1250.59572265625


NSE: [0.93862112 0.905276  ]
Saved model to disk

Size 38, Batch Size: 128, Learning Rate:0.001


RMSE 1일차: 50.25
MAE 1일차: 15.84
R2 1일: 0.8949016830860324
Max Error 1일: 806.8538867187499


RMSE 2일차: 63.90
MAE 2일차: 18.13
R2 2일: 0.8300150327043022
Max Error 2일: 1047.408466796875


NSE: [0.95245891 0.9231053 ]
Saved model to disk


Learning Rate:   0%|          | 0/3 [00:00<?, ?it/s]


Size 38, Batch Size: 256, Learning Rate:0.1


RMSE 1일차: 4653.72
MAE 1일차: 585.13
R2 1일: -900.5796645743117
Max Error 1일: 94256.59125


RMSE 2일차: 7462.92
MAE 2일차: 803.79
R2 2일: -2317.50549909761
Max Error 2일: 189205.93875


NSE: [0.71982445 0.27947877]
Saved model to disk

Size 38, Batch Size: 256, Learning Rate:0.01


RMSE 1일차: 66.46
MAE 1일차: 17.55
R2 1일: 0.8161492718243909
Max Error 1일: 1048.4978320312498


RMSE 2일차: 75.35
MAE 2일차: 19.20
R2 2일: 0.7636318214063293
Max Error 2일: 1314.359638671875


NSE: [0.92199367 0.89970806]
Saved model to disk

Size 38, Batch Size: 256, Learning Rate:0.001


RMSE 1일차: 44.17
MAE 1일차: 14.94
R2 1일: 0.9187830097333952
Max Error 1일: 574.7243701171874


RMSE 2일차: 58.59
MAE 2일차: 17.11
R2 2일: 0.8570843232582444
Max Error 2일: 1090.51076171875


NSE: [0.96340189 0.93559729]
Saved model to disk
