In [None]:
from ipynb.fs.full.CommonFunctions import col_lagger,split_sequences
import pandas as pd
import numpy as np
import hydroeval as he
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score
from keras.models import *
from keras.layers import *
import os
import warnings
from keras.optimizers import *
from tqdm import tqdm_notebook
from keras.callbacks import *
from keras.models import model_from_yaml
import gc

In [None]:
warnings.filterwarnings('ignore')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
plt.style.use('ggplot')

CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'

color_list = [CB91_Blue, 
              CB91_Pink, 
              CB91_Green, 
              CB91_Amber,
              CB91_Purple, 
              CB91_Violet]

mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams['font.size'] = 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['axes.labelsize'] = 12
plt.rcParams["font.family"] = 'Nanum Brush Script OTF'
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [None]:
df = pd.read_csv('..//Processed_data//weather_dam_wrn.csv',encoding='cp949',index_col=[0])
df.index = pd.to_datetime(df.index)

In [None]:
df[['강우량(mm)+1']] = df[['강우량(mm)']].shift(-1)
#df[['강우량(mm)+2']] = df[['강우량(mm)']].shift(-2)
df = col_lagger(df,'강우량(mm)',7)
df = col_lagger(df,'유입량(㎥/s)',7)
df = col_lagger(df,'최저기온(°C)',7)
df = col_lagger(df,'최고기온(°C)',7)
df = col_lagger(df,'평균 풍속(m/s)',7)
df = col_lagger(df,'호우경보',7)
df = col_lagger(df,'평균 상대습도(%)',7)
df = col_lagger(df,'합계 일사량(MJ/m2)',7)

In [None]:
df = df.drop(columns=['최저기온(°C)', '최고기온(°C)', 
                      '평균 풍속(m/s)', '평균 상대습도(%)',
                      '합계 일사량(MJ/m2)','호우경보'])
df = df.dropna()

In [None]:
X = df[[c for c in df.columns if '유입량(㎥/s)'  != c]].values
y = df[['유입량(㎥/s)']].values

data_stacked = np.hstack((X,y))

del X
del y
X,y = split_sequences(data_stacked,1,2)
train_length = int(df.shape[0]*0.8)

train_X , train_y = X[:train_length, :] , y[:train_length, :]
test_X , test_y = X[train_length:, :] , y[train_length:, :]

# Scaling X and y

In [None]:
scaler1x = StandardScaler()
scaler1y = StandardScaler()

#Scale X
train_X_prescaled = train_X.reshape(train_X.shape[0],-1)
train_X1 = scaler1x.fit_transform(train_X_prescaled).reshape(train_X_prescaled.shape[0],1,train_X_prescaled.shape[-1])

test_X_prescaled = test_X.reshape(test_X.shape[0],-1)
test_X1 = scaler1x.transform(test_X_prescaled).reshape(test_X.shape[0],1,test_X.shape[-1])

#Scale y
train_y_prescaled = train_y.reshape(train_y.shape[0],-1)
train_y1 = scaler1y.fit_transform(train_y_prescaled).reshape(train_y.shape[0],1,train_y.shape[-1])

test_y_prescaled = test_y.reshape(test_y.shape[0],-1)
test_y1 = scaler1y.transform(test_y_prescaled).reshape(test_y.shape[0],1,test_y.shape[-1])

In [None]:
print(train_X1.shape)
print(test_X1.shape)
print(train_y1.shape)
print(test_y1.shape)

# Grid Search for SimpleRNN 

In [None]:
lr = [0.1,0.01,0.001]
batches = [64,128,256]

for batch in tqdm_notebook(batches, desc='Batch Size'):
    for rate in tqdm_notebook(lr,leave=False, desc='Learning Rate'):
        

        model_input = Input(shape=(train_X1.shape[1], train_X1.shape[2]))
        
        RNN1 = SimpleRNN(59,
                       return_sequences=True)(model_input)

        
        RNN2 = SimpleRNN(59,
               return_sequences=True)(RNN1)

        
        RNN3= SimpleRNN(59, 
               return_sequences=True)(RNN2)
            

       
        output = TimeDistributed(Dense(2,
                  activation='linear'))(RNN3)

        model = tf.keras.models.Model(model_input,output)
        
        opt = tf.keras.optimizers.SGD(learning_rate=rate)
        model.compile(loss='mse',optimizer=opt,metrics=['mse'])

        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                      min_delta=0.1,
                                      patience=5, min_lr=1e-5, vebose=1)
        history = model.fit(train_X1, train_y1, 
                            epochs=3000, 
                            verbose=0,
                            batch_size=batch,
                            shuffle=False,
                            validation_split=0.1,
                            callbacks=[reduce_lr])

        print(f'\n\nBatch Size: {batch}, Learning Rate:{rate}\n\n') 
        
        y_hat = model.predict(test_X1)

        y_hat_inv = scaler1y.inverse_transform(y_hat).reshape(y_hat.shape[0],-1)
        test_y1_inv = scaler1y.inverse_transform(test_y1).reshape(test_y1.shape[0],-1)

        print(f'RMSE for first day: {mean_squared_error(test_y1_inv[:,0],y_hat_inv[:,0],squared=False):.2f}')
        print(f'MAE for first day: {mean_absolute_error(test_y1_inv[:,0],y_hat_inv[:,0]):.2f}\n\n')

        print(f'RMSE for second day: {mean_squared_error(test_y1_inv[:,1],y_hat_inv[:,1],squared=False):.2f}')
        print(f'MAE for second day: {mean_absolute_error(test_y1_inv[:,1],y_hat_inv[:,1]):.2f}\n\n')

        print(f'NSE: {he.nse(test_y1_inv,y_hat_inv)}\n')
        model_yaml = model.to_yaml()
        with open(f"baselineRNN_lr{rate}_batch_{batch}.yaml", "w") as yaml_file:
            yaml_file.write(model_yaml)

        model.save_weights(f"baselineRNN_lr{rate}_batch_{batch}.h5")
        print("Saved model to disk")
        print('*'*60)
        del model
        del y_hat
        
        gc.collect()