In [None]:
#!pip install ipynb

In [None]:
from ipynb.fs.full.CommonFunctions import col_lagger,split_sequences
import pandas as pd
import numpy as np
import hydroeval as he
import seaborn as sns
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error,max_error
import os
import warnings
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
warnings.filterwarnings('ignore')

plt.style.use('ggplot')
mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams["font.family"] = 'Nanum Brush Script OTF'
mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams['font.size'] = 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['axes.labelsize'] = 12

CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'

color_list = [CB91_Blue, 
              CB91_Pink, 
              CB91_Green, 
              CB91_Amber,
              CB91_Purple, 
              CB91_Violet]

plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)

seed = 42
np.random.seed(seed)

In [None]:
df = pd.read_csv('..//Processed_data//weather_dam_wrn.csv',encoding='cp949',index_col=[0])
df.index = pd.to_datetime(df.index)

In [None]:
df[['강우량(mm)+1']] = df[['강우량(mm)']].shift(-1)
#df[['강우량(mm)+2']] = df[['강우량(mm)']].shift(-2)
df = col_lagger(df,'강우량(mm)',7)
df = col_lagger(df,'유입량(㎥/s)',7)
df = col_lagger(df,'최저기온(°C)',7)
df = col_lagger(df,'최고기온(°C)',7)
df = col_lagger(df,'평균 풍속(m/s)',7)
df = col_lagger(df,'호우경보',7)
df = col_lagger(df,'평균 상대습도(%)',7)
df = col_lagger(df,'합계 일사량(MJ/m2)',7)

In [None]:
df = df.drop(columns=['최저기온(°C)', '최고기온(°C)', 
                      '평균 풍속(m/s)', '평균 상대습도(%)',
                      '합계 일사량(MJ/m2)','호우경보'])
df = df.dropna()

In [None]:
X = df[[c for c in df.columns if '유입량(㎥/s)'  != c]].values
y = df[['유입량(㎥/s)']].values

data_stacked = np.hstack((X,y))

del X
del y
X,y = split_sequences(data_stacked,1,2)
train_length = int(df.shape[0]*0.8)

train_X , train_y = X[:train_length, :] , y[:train_length, :]
test_X , test_y = X[train_length:, :] , y[train_length:, :]

df_100 = df[df['유입량(㎥/s)'] > 100]
df_100_mean = df_100['강우량(mm)'].mean()

# Scaling X and y

In [None]:
train_idx = []
for i, train in enumerate(train_X):
    if train[0][0] > df_100_mean:
        train_idx.append(i)

In [None]:
test_idx = []
for i, test in enumerate(test_X):
    if test[0][0] > df_100_mean:
        test_idx.append(i)

In [None]:
scaler1x = StandardScaler()
scaler1y = StandardScaler()

#Scale X
train_X_prescaled = train_X.reshape(train_X.shape[0],-1)
train_X1 = scaler1x.fit_transform(train_X_prescaled).reshape(train_X_prescaled.shape[0],-1)#,train_X_prescaled.shape[-1])

test_X_prescaled = test_X.reshape(test_X.shape[0],-1)
test_X1 = scaler1x.transform(test_X_prescaled).reshape(test_X.shape[0],-1)#,test_X.shape[-1])

#Scale y
train_y_prescaled = train_y.reshape(train_y.shape[0],-1)
train_y1 = scaler1y.fit_transform(train_y_prescaled).reshape(train_y.shape[0],-1)#,train_y.shape[-1])

test_y_prescaled = test_y.reshape(test_y.shape[0],-1)
test_y1 = scaler1y.transform(test_y_prescaled).reshape(test_y.shape[0],-1)#,test_y.shape[-1])

In [None]:
print(train_X1.shape)
print(test_X1.shape)
print(train_y1.shape)
print(test_y1.shape)

# Selecting those who are greater than 100

In [None]:
train_X_ge_100 = train_X1[train_idx,:]
test_X_ge_100 = test_X1[test_idx,:]
train_y1_ge_100 = train_y1[train_idx,:]
test_y1_ge_100 = test_y1[test_idx,:]

In [None]:
print(train_X_ge_100.shape)
print(train_y1_ge_100.shape)
print(test_X_ge_100.shape)
print(test_y1_ge_100.shape)

# Selecting those who are less than 100

In [None]:
tr_idx = [i for i in range(train_length)]
te_idx = [i for i in range(1130)]

In [None]:
tr_le_100 = []
te_le_100 = []

for i in tr_idx:
    if i not in train_idx:
        tr_le_100.append(i)

for i in te_idx:
    if i not in test_idx:
        te_le_100.append(i)

In [None]:
train_X_le_100 = train_X1[tr_le_100,:]
test_X_le_100 = test_X1[te_le_100,:]
train_y1_le_100 = train_y1[tr_le_100,:]
test_y1_le_100 = test_y1[te_le_100,:]

In [None]:
print(train_X_le_100.shape)
print(train_y1_le_100.shape)
print(test_X_le_100.shape)
print(test_y1_le_100.shape)

# Running MLP with inflow greater than 100

In [None]:
mlp = MLPRegressor(activation = 'logistic',
                   batch_size = 32,
                   hidden_layer_sizes = (100,100,100),
                   learning_rate = 'constant',
                   shuffle = False,
                   solver = 'lbfgs')

wrapper_mlp = MultiOutputRegressor(mlp)

test_y1_ge_100_inv = scaler1y.inverse_transform(test_y1_ge_100)

#MLP
mlp2 = wrapper_mlp.fit(train_X_ge_100,train_y1_ge_100)
mlp_pred = mlp2.predict(test_X_ge_100)
mlp_pred_inv = scaler1y.inverse_transform(mlp_pred)

# Running GB with inflow less than 100

In [None]:
gb = GradientBoostingRegressor(criterion = 'mse',
                               learning_rate = 0.1,
                               loss = 'ls',
                               n_estimators = 300)


wrapper_gb = MultiOutputRegressor(gb)

test_y1_le_100_inv = scaler1y.inverse_transform(test_y1_le_100)

#gb2
gb2 = wrapper_gb.fit(train_X_le_100,train_y1_le_100)
gb_pred = gb2.predict(test_X_le_100)
gb_pred_inv = scaler1y.inverse_transform(gb_pred)

# Running rf with inflow less than 100

In [None]:
rf = RandomForestRegressor(criterion = 'mse',
                               max_features = 'auto',
                               n_estimators = 100)


wrapper_gb = MultiOutputRegressor(rf)

test_y1_le_100_inv = scaler1y.inverse_transform(test_y1_le_100)

#gb2
rf2 = wrapper_gb.fit(train_X_le_100,train_y1_le_100)
rf_pred = rf2.predict(test_X_le_100)
rf_pred_inv = scaler1y.inverse_transform(rf_pred)

# GB_MLP

In [None]:
gb_mlp_pred  = np.vstack((mlp_pred_inv,gb_pred_inv))
print(gb_mlp_pred.shape)
gb_mlp_real = np.vstack((test_y1_ge_100_inv,test_y1_le_100_inv))
print(gb_mlp_real.shape)

# RF_MLP

In [None]:
rf_mlp_pred  = np.vstack((mlp_pred_inv,rf_pred_inv))
print(rf_mlp_pred.shape)

In [None]:
rf_mlp_real = np.vstack((test_y1_ge_100_inv,test_y1_le_100_inv))
print(gb_mlp_real.shape)

In [None]:
print('..... RF MLP ..... ')
print('*'*40)
print(f'RMSE for the first day: {mean_squared_error(rf_mlp_real[:,0],rf_mlp_pred[:,0],squared=False)}')
print(f'MAE for the first day: {mean_absolute_error(rf_mlp_real[:,0],rf_mlp_pred[:,0])}\n')

print()
print(f'RMSE for the second day: {mean_squared_error(rf_mlp_real[:,1],rf_mlp_pred[:,1],squared=False)}')
print(f'MAE for the second day: {mean_absolute_error(rf_mlp_real[:,1],rf_mlp_pred[:,1])}\n')

print()
print(f'NSE : {he.nse(rf_mlp_real,rf_mlp_pred)}')
print()