In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd 
from keras.layers import LSTM,Dropout,Dense
from keras.layers import Conv1D,MaxPooling1D,Flatten
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential


**Read data**

In [None]:
df=pd.read_csv("/Users/rahuljauhari/Desktop/research runoff/final destination/merged_imd.csv")
df.drop(columns=['Unnamed: 0'],inplace=True)

In [None]:
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.set_index('DateTime', inplace=True)

In [None]:
# monthly_mean = df
monthly_mean = df.resample('M').mean()

**Actual value**

In [None]:
df_actual=pd.read_excel("/Users/rahuljauhari/Desktop/research runoff/Calibrated and Validated.xlsx") #monthly
# df_actual=pd.read_excel("/Users/rahuljauhari/Desktop/research runoff/Historical_Scenario_1982_2020_and_Future_Runoff_Predictions_From_2021-2099_using_XGBoost_and_EFUSE_Models (2).xlsx") #daily

In [None]:
# monthly
observed_runnoff=df_actual['observed']

# daily
# observed_runnoff=df_actual.iloc[:,-1]
# observed_runnoff.head()

**Normalization**

In [None]:
from scipy.stats import zscore
def func(name):
    x=0
    y=0
    inv= 0
    if name=='zscore':
        x_norm = zscore(monthly_mean)
        y_norm = zscore(observed_runnoff)
        # x_norm[x_norm > 3] = 2.8
        # x_norm[x_norm < -3] = -2.8
        # y_norm[y_norm >3] = 2.8
        # y_norm[y_norm < -3] = -2.8
        x=x_norm
        y=y_norm
    if name=='StandardScaler':
        scaler = StandardScaler()
        x_scaled = scaler.fit_transform(monthly_mean)
        y_scaled = scaler.fit_transform(observed_runnoff.values.reshape(-1,1))
        x_scaled[x_scaled > 3] = 2.8
        x_scaled[x_scaled < -3] = -2.8
        y_scaled[y_scaled >3] = 2.8
        y_scaled[y_scaled < -3] = -2.8
        x=      x_scaled  
        y=y_scaled
        inv = scaler
        
    if name == 'MinMaxScaler':
        scaler = MinMaxScaler(feature_range=(0,1))
        x_scaled = scaler.fit_transform(monthly_mean)
        y_scaled = scaler.fit_transform(observed_runnoff.values.reshape(-1,1))
        x=      x_scaled  
        y=y_scaled
        inv = scaler
    return x,y,inv

In [None]:

from sklearn.metrics import mean_squared_error
def rmse1(yt, yp): #lower the better
    return np.sqrt(mean_squared_error(yt, yp))
# Kling-Gupta effciency
def kge1(yt, yp): #highqer the better
    r = np.corrcoef(yt, yp,rowvar=False)[0, 1]
    alpha = np.std(yp) / np.std(yt)
    beta = np.mean(yp) / np.mean(yt)
    return 1 - np.sqrt((r - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)
    # r squared
def r21(yt, yp): #higher the better
    return 1 - np.sum((yt - yp)**2) / np.sum((yt - np.mean(yt))**2)
    # Nash-Sutcliffe efficiency
def nse(predictions, targets):
    return (1-(np.sum((predictions-targets)**2)/np.sum((targets-np.mean(targets))**2)))


In [None]:
from sklearn.model_selection import train_test_split


In [None]:
from keras.optimizers import Adam, RMSprop, SGD
x,y,inv = func('MinMaxScaler')
x_train, x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,shuffle=False)
# validation_data 10% of train data
x_train, x_val,y_train,y_val = train_test_split(x_train,y_train,test_size=0.1,shuffle=False)

In [None]:
# kerastuner
import keras_tuner as kt
from keras_tuner.tuners import RandomSearch
from keras_tuner.engine.hyperparameters import HyperParameters
from keras import regularizers
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('n_layers', 1, 4)):
        model.add(Conv1D(filters=hp.Int('input_units', min_value=32, max_value=512, step=32), kernel_size=hp.Int('kernel_size', min_value=1, max_value=5, step=1), activation=hp.Choice('act_' + str(i), values=['relu', 'sigmoid','linear','tanh']), input_shape=(x_train.shape[1],1)))
        model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(hp.Float('dropout_1', 0, 0.5, step=0.1, default=0.2)))
    model.add(Flatten())
    model.add(layers.Dense(1,activation=hp.Choice('act_' + str(i), values=['relu', 'sigmoid','linear','tanh']),kernel_regularizer=regularizers.l2(hp.Float('l2', 0, 0.5, step=0.1, default=0.2))))
    hp_lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    hp_optimizer = hp.Choice('optimizer', values=['sgd', 'rmsprop', 'adam'])

    if hp_optimizer == 'sgd':
        optimizer = SGD(learning_rate=hp_lr)
    elif hp_optimizer == 'rmsprop':
        optimizer = RMSprop(learning_rate=hp_lr)
    else:
        optimizer = Adam(learning_rate=hp_lr)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])
    return model

tuner = RandomSearch( build_model, objective='mse', max_trials=10, executions_per_trial=1, directory='project', project_name='CNN')
tuner.search(x_train, y_train, epochs=100,verbose=0,validation_data=(x_val, y_val))
best_model = tuner.get_best_models()[0]

In [None]:
from sklearn.metrics import r2_score
print("train ",r2_score(y_train, best_model.predict(x_train)))
print("val ",r2_score(y_val, best_model.predict(x_val)))
print("test ",r2_score(y_test, best_model.predict(x_test)))

In [None]:
best_model.summary()
# optimizer used in best model
print(best_model.optimizer.get_config())
# activation used in best model
best_model.layers[0].get_config()['activation']

In [None]:
# dropout used in best model
best_model.layers[4].get_config()

In [None]:
# learnign rate
best_model.optimizer.get_config()['learning_rate']

In [None]:
# regulari
best_model.layers[6].get_config()['kernel_regularizer']

In [None]:
# optimizer
best_model.optimizer.get_config()['name']

In [None]:
activation = [ 'linear']
optimizer = ['rmsprop']
preprocess = ['MinMaxScaler']

In [None]:
from sklearn.metrics import r2_score
for act in activation:
    for opt in optimizer:
        for pre in preprocess:
            model = Sequential()
            x,y,inv_scaler= func(pre)
            x_train, x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,shuffle=False)
            model.add(Conv1D(filters=64, kernel_size=5, activation=act, input_shape=(x_train.shape[1],1),kernel_initializer='he_normal'))
            model.add(MaxPooling1D(pool_size=2))
            model.add(Dropout(0.5))
            model.add(Dense(256, activation=act))
            model.add(Conv1D(filters=256, kernel_size=5, activation=act,kernel_initializer='he_normal'))
            model.add(MaxPooling1D(pool_size=2))
            model.add(Flatten())
            model.add(Dense(256, activation=act))
            model.add(Dense(1, activation=act))
            model.compile(optimizer=opt, loss='mean_squared_error', metrics=['accuracy'])
            model.fit(x_train, y_train, epochs=100, batch_size=30, verbose=0)
            y_pred = model.predict(x_test)
            y_pred_train = model.predict(x_train)
            try:
                # _ = pd.DataFrame({'pre':pre,'act':act,'opt':opt,'r2_train':r2_score(y_train,y_pred_train),'r2_test':r2_score(y_test,y_pred)},index=[0])
                # # _ = pd.DataFrame({'pre':pre,'act':act,'opt':opt,'rmse_train':rmse1(y_train,y_pred_train),'rmse_test':rmse1(y_test,y_pred),'kge_train':kge1(y_train,y_pred_train),'kge_test':kge1(y_test,y_pred),'r2_train':r21(y_train,y_pred_train),'r2_test':r21(y_test,y_pred)},index=[0])
                # _.to_csv('/Users/rahuljauhari/Desktop/research runoff/results1/imd_cnn.csv',mode='a',header=True)
                y= np.concatenate((y_pred_train,y_pred),axis=0)
                # inverse transform
                y_inv = inv_scaler.inverse_transform(y)
                # to csv
                pd.DataFrame(y_inv).to_csv('/Users/rahuljauhari/Desktop/research runoff/results1/imd_cnn_0.3.csv',mode='a',header=True)
            except Exception as e:
                print(e)