In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
from keras.layers import LSTM, Dropout, Dense
from keras.layers import SimpleRNN
from keras.layers import GRU
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
# imort Sequential
from keras.models import Sequential

**Read data**


In [None]:
df = pd.read_csv("merged_imd.csv")
df.drop(columns=['Unnamed: 0'], inplace=True)
print(df.columns)

In [None]:
df.shape

In [None]:
# select first 157 columns
df = df.iloc[:, :157]
df.shape

In [None]:
df.shape

In [None]:
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.set_index('DateTime', inplace=True)
monthly_mean = df.resample('M').mean()
monthly_mean.shape

**Actual value**


In [None]:
df_actual = pd.read_excel("Calibrated and Validated.xlsx")
# select last column
observed_runnoff = df_actual['observed']
# observed_runnoff.head()
observed_runnoff.shape

**Normalization**


In [None]:
def func(name):
    x = 0
    y = 0
    inv = 0

    if name == 'StandardScaler':
        scaler = StandardScaler()
        x_scaled = scaler.fit_transform(monthly_mean)
        y_scaled = scaler.fit_transform(observed_runnoff.values.reshape(-1, 1))
        x = x_scaled
        y = y_scaled
        inv = scaler

    if name == 'MinMaxScaler':
        scaler = MinMaxScaler(feature_range=(0, 1))
        x_scaled = scaler.fit_transform(monthly_mean)
        y_scaled = scaler.fit_transform(observed_runnoff.values.reshape(-1, 1))
        x = x_scaled
        y = y_scaled
        inv = scaler
        
    return x, y, inv

**Metric**

In [None]:
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam, RMSprop, SGD

In [None]:
activation = [ 'tanh','relu','sigmoid']
optimizer = ['adam','RMSprop','sgd']
preprocess = ['StandardScaler','MinMaxScaler']

In [None]:
from sklearn.metrics import mean_squared_error

def rmse1(yt, yp): #lower the better
    return np.sqrt(mean_squared_error(yt, yp))

# Kling-Gupta effciency
def kge1(yt, yp): #highqer the better
    r = np.corrcoef(yt, yp,rowvar=False)[0, 1]
    alpha = np.std(yp) / np.std(yt)
    beta = np.mean(yp) / np.mean(yt)
    return 1 - np.sqrt((r - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)

# Normalized standard Error 
def nse1(yt, yp): 
    return 1 - np.sum((yt - yp)**2) / np.sum((yt - np.mean(yt))**2)

# r squared
def r21(yt, yp): #higher the better
    return 1 - np.sum((yt - yp)**2) / np.sum((yt - np.mean(yt))**2)

In [None]:
i=0
# while True:
for act in activation:
    if i ==0:
        for opt in optimizer:
            if i ==0:
                for pre in preprocess:
                    if i ==0:
                        x,y,inv_scaler= func(pre)
                        X_train, X_test,y_train,y_test = train_test_split(x,y,test_size=0.2,shuffle=False)
                        model = keras.Sequential()
                        model.add(GRU(128,return_sequences=True, activation=act, input_shape=(X_train.shape[1],1)))
                        model.add(Dense(128))
                        model.add(LSTM(128, activation=act,return_sequences=True))
                        model.add(Dropout(0.5))
                        model.add(LSTM(128, activation=act,return_sequences=True))
                        model.add(Dense(128))
                        model.add(LSTM(128, activation=act))
                        model.add(Dropout(0.5))
                        model.add(layers.Dense(1))
                        model.compile(optimizer=opt, loss='mse')
                        model.fit(X_train, y_train, batch_size=32, epochs=5,shuffle=False, use_multiprocessing=True,verbose=0,validation_split=0.1)
                        y_pred = model.predict(X_test)
                        y_pred_train = model.predict(X_train)
                        try:
                            if kge1(y_train,y_pred_train) >0.7 and kge1(y_test,y_pred)>0.7:
                                _ = pd.DataFrame({'pre':pre,'act':act,'opt':opt,'rmse_train':rmse1(y_train,y_pred_train),'rmse_test':rmse1(y_test,y_pred),'kge_train':kge1(y_train,y_pred_train),'kge_test':kge1(y_test,y_pred),'r2_train':r21(y_train,y_pred_train),'r2_test':r21(y_test,y_pred)},index=[0])
                                _.to_csv('imd_gru_lstm2.csv',mode='a',header=True)
                                i+=1
                        except Exception as e:
                            print(e)
                    else:
                        break
            else:
                break
    else:
        break

In [None]:
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)
# to csv
y_inv_train=inv_scaler.inverse_transform(y_pred_train)
y_inv_test=inv_scaler.inverse_transform(y_pred_test)
pd.DataFrame(y_inv_train).to_csv('gru_lstm_train1.csv')
pd.DataFrame(y_inv_test).to_csv('gru_lstm_test1.csv')

**SSP**

In [None]:
df_ssp = pd.read_csv('SSP_monthly_245.csv')
df_ssp['DateTime'] = pd.to_datetime(df_ssp['DateTime'])
df_ssp.set_index('DateTime', inplace=True)
monthly_mean = df_ssp
print(monthly_mean.shape)
x,y,inv_scaler= func('StandardScaler')
print(x.shape)
y_pred = model.predict(x)
print(y_pred.shape)
y_inv = inv_scaler.inverse_transform(y_pred)
pd.DataFrame(y_inv).to_csv('SSP_monthly_245_gru_lstm_2.csv',mode='a',header=True)


**Optimized parameters**

In [None]:
activation = [ 'sigmoid']
optimizer = ['RMSprop']
preprocess = ['StandardScaler']

In [None]:
from sklearn.metrics import r2_score
a=1
while a:
    for act in activation:
        for opt in optimizer:
            for pre in preprocess:
                x,y,inv_scaler= func(pre)
                X_train, X_test,y_train,y_test = train_test_split(x,y,test_size=0.2,shuffle=False)
                model = keras.Sequential()
                model.add(GRU(128,return_sequences=True, activation=act, input_shape=(X_train.shape[1],1)))
                model.add(layers.Dense(128))
                model.add(LSTM(128, activation='relu'))
                model.add(layers.Dense(1))
                model.compile(optimizer=opt, loss='mse')
                model.fit(X_train, y_train, batch_size=64, epochs=10,shuffle=True, use_multiprocessing=True,verbose=0,validation_split=0.1)
                y_pred = model.predict(X_test)
                y_pred_train = model.predict(X_train)
                print(kge1(y_train,y_pred_train),kge1(y_test,y_pred))
                if kge1(y_train,y_pred_train) >0.7 and kge1(y_test,y_pred)>0.70:
                    a=0
                    print(a)
                # try:
                #     y= np.concatenate((y_pred_train,y_pred),axis=0)
                #     y_inv = inv_scaler.inverse_transform(y)
                #     pd.DataFrame(y_inv).to_csv('/Users/rahuljauhari/Desktop/research-runoff/results1/imd_gru_lstm_0.2.csv',mode='a',header=True)
                # except Exception as e:
                #     print(e)


In [None]:
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)
# to csv
y_inv_train=inv_scaler.inverse_transform(y_pred_train)
y_inv_test=inv_scaler.inverse_transform(y_pred_test)
pd.DataFrame(y_inv_train).to_csv('gru_lstm_train1.csv')
pd.DataFrame(y_inv_test).to_csv('gru_lstm_test1.csv')

**Hyperparameters**

In [None]:
# # keras tuner for gru lstm
# from keras import backend as K
# from sklearn.metrics import r2_score
# import keras_tuner as kt
# from keras_tuner.tuners import RandomSearch
# from keras import regularizers
# from keras_tuner.engine.hyperparameters import HyperParameters


# def build_model(hp):
#     model = keras.Sequential()
#     # for i in range(hp.Int('n_layers', 1, 2)):
#     model.add(layers.GRU(units=hp.Int('units', min_value=32, max_value=512, step=32), activation=hp.Choice(
#         'act_' , values=['relu', 'sigmoid', 'linear', 'tanh']),  return_sequences=True, input_shape=(x_train.shape[1], 1)))
#     model.add(layers.Dense(1, activation=hp.Choice(
#         'act_' , values=['relu', 'sigmoid', 'linear', 'tanh'])))
#     model.add(Dropout(hp.Float('dropout_1', 0, 0.5, step=0.1, default=0.2)))
#     # for i in range(hp.Int('n_layers', 1, 2)):
#     model.add(layers.LSTM(units=hp.Int('units', min_value=32, max_value=512, step=32), activation=hp.Choice(
#         'act_' , values=['relu', 'sigmoid', 'linear', 'tanh']), return_sequences=False))
#     model.add(Dropout(hp.Float('dropout_1', 0, 0.5, step=0.1, default=0.2)))
#     model.add(layers.Dense(1, activation=hp.Choice('act_' , values=[
#         'relu', 'sigmoid', 'linear', 'tanh']), kernel_regularizer=regularizers.l2(hp.Float('l2', 0, 0.5, step=0.1, default=0.2))))
#     hp_lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
#     hp_optimizer = hp.Choice('optimizer', values=['sgd', 'rmsprop', 'adam'])
#     if hp_optimizer == 'sgd':
#         optimizer = SGD(learning_rate=hp_lr)
#     elif hp_optimizer == 'rmsprop':
#         optimizer = RMSprop(learning_rate=hp_lr)
#     else:
#         optimizer = Adam(learning_rate=hp_lr)
#     model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])
#     return model


# early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
# tuner = RandomSearch(build_model, objective='mse', max_trials=10,
#                      executions_per_trial=1, directory='project', project_name='GRU_LSTM')
# tuner.search(x_train, y_train, epochs=100, verbose=1,
#              validation_data=(x_val, y_val), callbacks=[early_stopping])
# best_model = tuner.get_best_models(num_models=1)[0]

In [None]:
# from sklearn.metrics import r2_score
# print("train ",r2_score(y_train, best_model.predict(x_train)))
# print("val ",r2_score(y_val, best_model.predict(x_val)))
# print("test ",r2_score(y_test, best_model.predict(x_test)))
# best_model.summary()
# # optimizer used in best model
# print(best_model.optimizer.get_config())
# # activation used in best model
# best_model.layers[0].get_config()['activation']

In [None]:
# # droupout
# best_model.layers[2].get_config()['rate']

In [None]:
# # regularizer
# best_model.layers[5].get_config()['kernel_regularizer']