In [1]:
## Import dependent libraries
import os
import logging
import numpy as np
import pandas as pd
import tensorflow as tf
from pathlib import Path
from keras.models import Model
from keras.layers import Input, Concatenate, Reshape
from keras import optimizers, callbacks
from matplotlib import pyplot as plt
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow.keras.initializers import RandomNormal
## import required HIRNN
from libs.HIRNN_UP import HIRNNLayer, ConvLayer
from libs import hirnnutils
from keras import initializers, constraints, regularizers
from keras.layers import Layer, Dense, Lambda, Activation
import keras.backend as K
import tensorflow as tf
from keras.layers import Dropout, Activation
from keras.regularizers import l2
from tensorflow.keras.layers import concatenate 
from tensorflow.keras.backend import zeros
import random
## Ignore all the warnings
tf.get_logger().setLevel(logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['KMP_WARNINGS'] = '0'
tf.config.experimental.enable_op_determinism()
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
tf.config.optimizer.set_experimental_options({'cudnn_deterministic': True})
# Fix seeds for reproducibility
os.environ['PYTHONHASHSEED'] = '0'
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Enable deterministic operations
tf.config.experimental.enable_op_determinism()
tf.config.optimizer.set_experimental_options({'cudnn_deterministic': True})

2024-12-20 13:03:18.558419: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-20 13:03:18.615317: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-20 13:03:18.617188: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
stn_data = pd.read_csv('/HIRNN_codes/sample_input_data_HIRNN/unmanaged_intermittent_catchments_time_period_details.csv')
stn_data

Unnamed: 0,gauge_id,warm_up_cal_start,warm_up_cal_end,cal_start,cal_end,warm_up_val_start,warm_up_val_end,val_start,val_end,area_km2
0,404207,1989-01-01,1991-12-31,1992-01-01,2001-12-31,2002-01-01,2004-12-31,2005-01-01,2009-12-31,454.7
1,606185,1978-01-01,1980-12-31,1981-01-01,1990-12-31,1991-01-01,1993-12-31,1994-01-01,1998-12-31,379.4


In [3]:
def generate_train_test(train_set, test_set, wrap_length):
    train_x_np = train_set.values[:, :-1]
    train_y_np = train_set.values[:, -1:]
    test_x_np = test_set.values[:, :-1]
    test_y_np = test_set.values[:, -1:]
    
    wrap_number_train = (train_set.shape[0]-wrap_length)//365 + 1
    
    train_x = np.empty(shape = (wrap_number_train, wrap_length, train_x_np.shape[1]))
    train_y = np.empty(shape = (wrap_number_train, wrap_length, train_y_np.shape[1]))

    test_x = np.expand_dims(test_x_np, axis=0)
    test_y = np.expand_dims(test_y_np, axis=0)
    
    for i in range(wrap_number_train):
        train_x[i, :, :] = train_x_np[i*365:(wrap_length+i*365), :]
        train_y[i, :, :] = train_y_np[i*365:(wrap_length+i*365), :]
             
    return train_x, train_y, test_x, test_y



In [4]:
def create_model(input_shape, seed, num_filters = None, model_type='hybrid', kernel_size = None, dropout_rate = None):
    """Create a Keras model with regularization and dropout.
    """
    
    # Set seeds for reproducibility
    tf.random.set_seed(seed)
    np.random.seed(seed)
    
    # Define input layer
    x_input = Input(shape=input_shape, name='Input')
    
    if model_type == 'physical':
        hydro_output = PRNNLayer(mode= 'normal', name='Hydro')(x_input)
        model = Model(x_input, hydro_output)
    
    elif model_type == 'hybrid':
        cnn_output = ConvLayer(filters=num_filters_input, kernel_size=kernel_size_input, padding='causal', seed=seed, name='Conv1')(x_input)
        cnn_output = Dropout(dropout_rate, seed=seed)(cnn_output)
        cnn_output = ConvLayer(filters=1, kernel_size=1, padding='causal', seed=seed, name='Conv2')(cnn_output)
        model = Model(x_input, cnn_output)
    
    return model

def train_model(model, train_x, train_y, ep_number, lrate, save_path, seed):

#      train_x, train_y: the input and target for training the model
#      ep_number: the maximum epoch number
#     lrate: the initial learning rate
#     save_path: where the model will be saved
 
     # Set seeds for reproducibility
    tf.random.set_seed(seed)
    np.random.seed(seed)
    
    save = callbacks.ModelCheckpoint(save_path, verbose=0, save_best_only=True, monitor='nse_metrics', mode='max',
                                     save_weights_only=True)
    es = callbacks.EarlyStopping(monitor='nse_metrics', mode='max', verbose=0, patience=20, min_delta=0.0005,
                                 restore_best_weights=True)
    reduce = callbacks.ReduceLROnPlateau(monitor='nse_metrics', factor=0.8, patience=5, verbose=0, mode='max',
                                         min_delta=0.0005, cooldown=0, min_lr=lrate / 100)
    tnan = callbacks.TerminateOnNaN()
    model.compile(loss=hirnnutils.nse_loss, metrics=[hirnnutils.nse_metrics], optimizer=optimizers.Adam(learning_rate=lrate))

    # Use the shuffled data for training
    history = model.fit(train_x, train_y, epochs=ep_number, batch_size=10000, callbacks=[save, es, reduce, tnan], shuffle=False, verbose=0)
    
    return history

def test_model(model, test_x, save_path):
    """Test a Keras model.
    -- model: the Keras model object
    -- test_x: the input for testing the model
    -- save_path: where the model was saved
    """
    model.load_weights(save_path)  # Remove by_name=True
    pred_y = model.predict(test_x, batch_size=10000)
    return pred_y    
    

In [5]:
def NS(s,o):
    
    #Nash Sutcliffe efficiency coefficient
    #input:
        #s: simulated
        #o: observed
    
    return 1 - sum((s-o)**2)/sum((o-np.mean(o))**2)

def pc_bias(s,o):

#     Percent Bias
#     input:
#         s: simulated
#         o: observed

    return 100.0*sum(o-s)/sum(o)
def rmse(s,o):

#     Root Mean Squared Error
#     input:
#         s: simulated
#         o: observed

    return np.sqrt(np.mean((s-o)**2))


def kge(s,o):
    
#     Kling Gupta Efficiency 
#     input:
#         s: simulated
#         o: observed
    
    alpha = np.std(s)/np.std(o)
    beta = np.mean(s)/np.mean(o)
    return 1-((1 - np.corrcoef(s,o)[0,1])**2 + (alpha - 1)**2 + (beta - 1)**2)**0.5


In [6]:
def normalize(data):
    data_mean = np.mean(data, axis=-2, keepdims=True)
    data_std = np.std(data, axis=-2, keepdims=True)
    data_scaled = (data - data_mean) / data_std
    return data_scaled, data_mean, data_std

In [7]:
perform = pd.DataFrame()
perform_list = []
perform[['Station','Dropout_rate', 'Kernel_size', 'Number_of_filters']] = ""
perform[['NSE_val', 'PBIAS_val', 'RMSE_val', 'KGE_val']] = ""
for stn in range(0, len(stn_data)):
    station = stn_data['gauge_id'][stn]
    print(station)
    hirnn_data = pd.read_csv('HIRNN_output.csv') # use HIRNN output data for both training and testing period
    hirnn_data['Date'] = pd.to_datetime(hirnn_data['Date'])

    # Keep only the specified columns
    hirnn_data = hirnn_data[['Date', 'Pptn', 'PET', 'flow_physical', 'streamflow_mm']]
    hirnn_data.set_index('Date', inplace=True)
    PRNN_cal_start = stn_data['warm_up_cal_start'][stn]
    cal_start = stn_data['cal_start'][stn] 
    cal_end = stn_data['cal_end'][stn] 
    PRNN_val_start = stn_data['warm_up_val_start'][stn] 
    val_end = stn_data['val_end'][stn] 
    val_start = stn_data['val_start'][stn] 
    ####################
    #  Period set up   #
    ####################

    training_start = PRNN_cal_start
    training_end= cal_end

    testing_start = PRNN_val_start
    testing_end= val_end

    # Split data set to training_set and testing_set
    train_set = hirnn_data[hirnn_data.index.isin(pd.date_range(training_start, training_end))]
    test_set = hirnn_data[hirnn_data.index.isin(pd.date_range(testing_start, testing_end))]

    wrap_length= 2922 # It can be other values, but recommend this value should not be less than 5 years (1826 days).
    train_x, train_y, test_x, test_y = generate_train_test(train_set, test_set, wrap_length=wrap_length)

    dropout_rates = [0.1, 0.2, 0.3, 0.4, 0.5]
    for dropout in range(0, len(dropout_rates)):  
        dropout_rate_input = dropout_rates[dropout]
        print('dropout_rate_input:',dropout_rate_input)
        kernel_sizes = [5, 10, 15, 20]
        for kernel in range(0, len(kernel_sizes)): 
            kernel_size_input = int(kernel_sizes[kernel])
            print('kernel_size_input:',kernel_size_input)
            num_filters = [2, 4, 8, 16, 32]
            for filter_size in range(0, len(num_filters)): 
                num_filters_input = int(num_filters[filter_size])
                print('num_filters_input:',num_filters_input)

                basin_id = str(station) + str('_HIRNN_droput_') + str(dropout_rate_input) + str('_kernel_size_') + str(kernel_size_input) + str('_filter_size_') + str(num_filters_input) 
                save_path_hybrid = f'/HIRNN_codes/{basin_id}_hybrid.weights.h5'# use desired path

                
                model = create_model((train_x.shape[1], train_x.shape[2]), seed = 200, num_filters = num_filters_input, model_type='hybrid', kernel_size=kernel_size_input, dropout_rate=dropout_rate_input)
                #model.summary()

                train_x_nor, train_x_mean, train_x_std = normalize(train_x)
                train_y_nor, train_y_mean, train_y_std = normalize(train_y)

                hybrid_history = train_model(model, train_x_nor, train_y_nor,ep_number=200, lrate=0.01, save_path=save_path_hybrid, seed = 200)
                
                ####################
                #  Hybrid NN model #
                ####################
                model = create_model((test_x.shape[1], test_x.shape[2]), seed = 200, num_filters = num_filters_input, model_type='hybrid', kernel_size=kernel_size_input, dropout_rate=dropout_rate_input)
                #We use the feature means/stds of the training period for normalization
                test_x_nor = (test_x - train_x_mean) / train_x_std 
                Q_hybrid = test_model(model, test_x_nor, save_path_hybrid)
                #We use the feature means/stds of the training period for recovery
                Q_hybrid = Q_hybrid * train_y_std + train_y_mean
                evaluate_set = test_set.loc[:, ['Pptn', 'PET', 'streamflow_mm']]
                evaluate_set['Q_obs'] = evaluate_set['streamflow_mm']
                evaluate_set['Q_hybrid'] = np.clip(Q_hybrid[0, :, :], a_min = 0, a_max = None)
                evaluate_set = evaluate_set.loc[val_start:val_end]
                evaluate_set = evaluate_set.fillna(0)
                NSE_val = NS(evaluate_set['Q_hybrid'],evaluate_set['Q_obs'])
                PBIAS_val = pc_bias(evaluate_set['Q_hybrid'],evaluate_set['Q_obs'])
                RMSE_val = rmse(evaluate_set['Q_hybrid'],evaluate_set['Q_obs'])
                KGE_val = kge(evaluate_set['Q_hybrid'],evaluate_set['Q_obs'])    
                perform_data = {
                    'Station': station,
                    'Dropout_rate':dropout_rate_input, 
                    'Kernel_size': kernel_size_input, 
                    'Number_of_filters': num_filters_input,
                    'NSE_val': NSE_val,
                    'PBIAS_val': PBIAS_val,
                    'RMSE_val': RMSE_val,
                    'KGE_val': KGE_val
                }
                #print(perform_data)
                perform_df = pd.DataFrame(perform_data, index=[0])
                perform_list.append(perform_df)

            # Concatenate all dataframes in the list into one dataframe
            perform = pd.concat(perform_list, ignore_index=True)    

404207
dropout_rate_input: 0.1
kernel_size_input: 5
num_filters_input: 2


2024-12-20 13:03:21.357033: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


num_filters_input: 4
num_filters_input: 8
num_filters_input: 16
num_filters_input: 32
kernel_size_input: 10
num_filters_input: 2
num_filters_input: 4
num_filters_input: 8
num_filters_input: 16
num_filters_input: 32
kernel_size_input: 15
num_filters_input: 2
num_filters_input: 4
num_filters_input: 8
num_filters_input: 16
num_filters_input: 32
kernel_size_input: 20
num_filters_input: 2
num_filters_input: 4
num_filters_input: 8
num_filters_input: 16
num_filters_input: 32


In [8]:
perform

Unnamed: 0,Station,Dropout_rate,Kernel_size,Number_of_filters,NSE_val,PBIAS_val,RMSE_val,KGE_val
0,404207,0.1,5,2,0.459197,-108.931453,0.386816,-0.148492
1,404207,0.1,5,4,0.50786,-80.808713,0.369003,0.128513
2,404207,0.1,5,8,0.47311,-85.913767,0.381808,0.07124
3,404207,0.1,5,16,0.581186,-96.182307,0.340405,-0.003884
4,404207,0.1,5,32,0.632805,-101.932313,0.318738,-0.044249
5,404207,0.1,10,2,0.628637,-80.246296,0.320541,0.175519
6,404207,0.1,10,4,0.537678,-76.923446,0.357649,0.176699
7,404207,0.1,10,8,0.55955,-87.550549,0.349087,0.076253
8,404207,0.1,10,16,0.641916,-84.814454,0.314759,0.121495
9,404207,0.1,10,32,0.66118,-93.601135,0.306175,0.040127
