From 27e420dd4de1bd60adbdefb0d48f7ad5dae8d334 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 28 Oct 2017 19:18:17 +0200 Subject: [PATCH] simulations now in parallel for multiple parameter sets, resolves #7 --- docs/source/examples/model_api_example.rst | 73 +++--- examples/model_api_example.ipynb | 70 +++--- rrmpg/models/abcmodel.py | 86 ++++--- rrmpg/models/gr4j.py | 264 ++++++++++++--------- rrmpg/models/hbvedu.py | 202 +++++++++------- rrmpg/tools/monte_carlo.py | 28 +-- test/test_models.py | 2 +- 7 files changed, 402 insertions(+), 323 deletions(-) diff --git a/docs/source/examples/model_api_example.rst b/docs/source/examples/model_api_example.rst index dc8585c..e4e0f8b 100644 --- a/docs/source/examples/model_api_example.rst +++ b/docs/source/examples/model_api_example.rst @@ -145,11 +145,11 @@ optimize this parameter as well. .dataframe thead tr:only-child th { text-align: right; } - - .dataframe thead th { 0.5232 + + .dataframe thead th { text-align: left; } - + .dataframe tbody tr th { vertical-align: top; } @@ -322,14 +322,14 @@ The inputs for this function can be found in the Help on method fit in module rrmpg.models.hbvedu: fit(qobs, temp, prec, month, PE_m, T_m, snow_init=0.0, soil_init=0.0, s1_init=0.0, s2_init=0.0) method of rrmpg.models.hbvedu.HBVEdu instance - Fit the model to a timeseries of discharge using. + Fit the HBVEdu model to a timeseries of discharge. This functions uses scipy's global optimizer (differential evolution) to find a good set of parameters for the model, so that the observed discharge is simulated as good as possible. Args: - qobs: Array of observed streaflow discharge. + qobs: Array of observed streamflow discharge. temp: Array of (mean) temperature for each timestep. prec: Array of (summed) precipitation for each timestep. month: Array of integers indicating for each timestep to which @@ -376,25 +376,24 @@ optimization prozess. Let us have a look on this object .. parsed-literal:: - fun: 9.3905281887057583 + fun: 9.3994864331708197 message: 'Optimization terminated successfully.' - nfev: 10782 - nit: 49 + nfev: 7809 + nit: 44 success: True - x: array([ -2.91830209e-01, 3.74195970e+00, 1.98920300e+02, - 1.52260747e+00, 1.24451078e-02, 9.03672595e+01, - 5.08673767e-02, 8.15783540e-02, 1.00310869e-02, - 4.88820992e-02, 4.94212319e+00]) - + x: array([ -2.97714592e-01, 3.77549145e+00, 1.99253759e+02, + 1.59770799e+00, 1.16985627e-02, 9.16944196e+01, + 5.03153295e-02, 7.93555552e-02, 1.02132677e-02, + 4.93546960e-02, 4.57495030e+00]) -Some of the relevant informations are: -- ``fun`` is the final value of our optimization criterion, the mean-squared-error in this case -- ``message`` describes the cause of the optimization termination -- ``nfev`` is the number of model simulations -- ``sucess`` is a flag wether or not the optimization was successful -- ``x`` are the optimized model parameters +Some of the relevant informations are: - ``fun`` is the final value of +our optimization criterion, the mean-squared-error in this case - +``message`` describes the cause of the optimization termination - +``nfev`` is the number of model simulations - ``sucess`` is a flag +wether or not the optimization was successful - ``x`` are the optimized +model parameters Now let's set the model parameter to the optimized parameter found by the optimizer. Therefore we need to create a dictonary containing on key @@ -423,17 +422,17 @@ dictonary by the following lines of code. .. parsed-literal:: - {'Beta': 1.5226074683067585, - 'C': 0.012445107819582725, - 'DD': 3.7419596954402823, - 'FC': 198.92030029792784, - 'K_0': 0.050867376726295155, - 'K_1': 0.081578354027559724, - 'K_2': 0.010031086907160772, - 'K_p': 0.048882099196888087, - 'L': 4.9421231856757295, - 'PWP': 90.367259489128287, - 'T_t': -0.29183020885043631} + {'Beta': 1.5977079902649263, + 'C': 0.011698562668775968, + 'DD': 3.775491450515529, + 'FC': 199.25375851504469, + 'K_0': 0.05031532948317842, + 'K_1': 0.079355555184944859, + 'K_2': 0.010213267703180977, + 'K_p': 0.049354696028968914, + 'L': 4.5749503030552034, + 'PWP': 91.694419598406654, + 'T_t': -0.29771459243043052} @@ -477,8 +476,8 @@ implemented in ``rrmpg.tools.monte_carlo``. key 'params' contains a numpy array with the model parameter of each simulation. 'qsim' is a 2D numpy array with the simulated streamflow for each simulation. If an array of observed streamflow is provided, - one additional key is returned in the dictonary, being 'nse'. This key - contains an array of the Nash-Sutcliff-Efficiency for each simulation. + one additional key is returned in the dictonary, being 'mse'. This key + contains an array of the mean-squared-error for each simulation. Raises: ValueError: If any input contains invalid values. @@ -548,8 +547,8 @@ simulated discharge for the inputs we provide to this function. .. parsed-literal:: - NSE of the .fit() optimization: 0.6173 - NSE of the Monte-Carlo-Simulation: 0.5232 + NSE of the .fit() optimization: 0.6170 + NSE of the Monte-Carlo-Simulation: 0.5283 And let us finally have a look at some window of the simulated @@ -573,14 +572,14 @@ timeseries compared to the observed discharge .. raw:: html - + .. parsed-literal:: - + @@ -601,4 +600,4 @@ runs. .. parsed-literal:: - 5.01 s ± 5.07 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + 1.68 s ± 29.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) diff --git a/examples/model_api_example.ipynb b/examples/model_api_example.ipynb index 8b1c11f..0b17005 100644 --- a/examples/model_api_example.ipynb +++ b/examples/model_api_example.ipynb @@ -341,7 +341,9 @@ { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "model = HBVEdu(area=area)" @@ -349,9 +351,7 @@ }, { "cell_type": "markdown", - "metadata": { - "collapsed": true - }, + "metadata": {}, "source": [ "## Fit the model to observed discharge\n", "\n", @@ -377,14 +377,14 @@ "Help on method fit in module rrmpg.models.hbvedu:\n", "\n", "fit(qobs, temp, prec, month, PE_m, T_m, snow_init=0.0, soil_init=0.0, s1_init=0.0, s2_init=0.0) method of rrmpg.models.hbvedu.HBVEdu instance\n", - " Fit the model to a timeseries of discharge using.\n", + " Fit the HBVEdu model to a timeseries of discharge.\n", " \n", " This functions uses scipy's global optimizer (differential evolution)\n", " to find a good set of parameters for the model, so that the observed \n", " discharge is simulated as good as possible.\n", " \n", " Args:\n", - " qobs: Array of observed streaflow discharge.\n", + " qobs: Array of observed streamflow discharge.\n", " temp: Array of (mean) temperature for each timestep.\n", " prec: Array of (summed) precipitation for each timestep.\n", " month: Array of integers indicating for each timestep to which\n", @@ -445,15 +445,15 @@ { "data": { "text/plain": [ - " fun: 9.3905281887057583\n", + " fun: 9.3994864331708197\n", " message: 'Optimization terminated successfully.'\n", - " nfev: 10782\n", - " nit: 49\n", + " nfev: 7809\n", + " nit: 44\n", " success: True\n", - " x: array([ -2.91830209e-01, 3.74195970e+00, 1.98920300e+02,\n", - " 1.52260747e+00, 1.24451078e-02, 9.03672595e+01,\n", - " 5.08673767e-02, 8.15783540e-02, 1.00310869e-02,\n", - " 4.88820992e-02, 4.94212319e+00])" + " x: array([ -2.97714592e-01, 3.77549145e+00, 1.99253759e+02,\n", + " 1.59770799e+00, 1.16985627e-02, 9.16944196e+01,\n", + " 5.03153295e-02, 7.93555552e-02, 1.02132677e-02,\n", + " 4.93546960e-02, 4.57495030e+00])" ] }, "execution_count": 9, @@ -487,17 +487,17 @@ { "data": { "text/plain": [ - "{'Beta': 1.5226074683067585,\n", - " 'C': 0.012445107819582725,\n", - " 'DD': 3.7419596954402823,\n", - " 'FC': 198.92030029792784,\n", - " 'K_0': 0.050867376726295155,\n", - " 'K_1': 0.081578354027559724,\n", - " 'K_2': 0.010031086907160772,\n", - " 'K_p': 0.048882099196888087,\n", - " 'L': 4.9421231856757295,\n", - " 'PWP': 90.367259489128287,\n", - " 'T_t': -0.29183020885043631}" + "{'Beta': 1.5977079902649263,\n", + " 'C': 0.011698562668775968,\n", + " 'DD': 3.775491450515529,\n", + " 'FC': 199.25375851504469,\n", + " 'K_0': 0.05031532948317842,\n", + " 'K_1': 0.079355555184944859,\n", + " 'K_2': 0.010213267703180977,\n", + " 'K_p': 0.049354696028968914,\n", + " 'L': 4.5749503030552034,\n", + " 'PWP': 91.694419598406654,\n", + " 'T_t': -0.29771459243043052}" ] }, "execution_count": 10, @@ -566,8 +566,8 @@ " key 'params' contains a numpy array with the model parameter of each \n", " simulation. 'qsim' is a 2D numpy array with the simulated streamflow \n", " for each simulation. If an array of observed streamflow is provided,\n", - " one additional key is returned in the dictonary, being 'nse'. This key\n", - " contains an array of the Nash-Sutcliff-Efficiency for each simulation.\n", + " one additional key is returned in the dictonary, being 'mse'. This key\n", + " contains an array of the mean-squared-error for each simulation.\n", " \n", " Raises:\n", " ValueError: If any input contains invalid values.\n", @@ -582,9 +582,7 @@ }, { "cell_type": "markdown", - "metadata": { - "collapsed": true - }, + "metadata": {}, "source": [ "As specified in the help text, all model inputs needed for a simulation must be provided as keyword arguments. The keywords need to match the names specified in the `model.simulate()` function. We'll create a new model instance and see how this works for the HBVEdu model." ] @@ -592,7 +590,9 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "model2 = HBVEdu(area=area)\n", @@ -637,8 +637,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "NSE of the .fit() optimization: 0.6173\n", - "NSE of the Monte-Carlo-Simulation: 0.5232\n" + "NSE of the .fit() optimization: 0.6170\n", + "NSE of the Monte-Carlo-Simulation: 0.5283\n" ] } ], @@ -1455,7 +1455,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -1467,7 +1467,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 14, @@ -1499,7 +1499,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "5.01 s ± 5.07 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "1.68 s ± 29.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], diff --git a/rrmpg/models/abcmodel.py b/rrmpg/models/abcmodel.py index c3ad1fc..7563468 100644 --- a/rrmpg/models/abcmodel.py +++ b/rrmpg/models/abcmodel.py @@ -14,7 +14,7 @@ import numpy as np -from numba import njit +from numba import njit, prange from scipy import optimize from .basemodel import BaseModel @@ -102,7 +102,8 @@ def get_random_params(self, num=1): return params - def simulate(self, prec, initial_state=0, return_storage=False): + def simulate(self, prec, initial_state=0, return_storage=False, + params=None): """Simulate the streamflow for the passed precipitation. This function makes sanity checks on the input and then calls the @@ -114,6 +115,10 @@ def simulate(self, prec, initial_state=0, return_storage=False): initial_state: (optional) Initial value for the storage. return_storage: (optional) Boolean, wether or not to return the simulated storage for each timestep. + params: (optional) Numpy array of parameter sets, that will be + evaluated a once in parallel. Must be of the models own custom + data type. If nothing is passed, the parameters, stored in the + model object, will be used. Returns: An array with the simulated stream flow for each timestep and @@ -144,10 +149,21 @@ def simulate(self, prec, initial_state=0, return_storage=False): if not isinstance(return_storage, bool): raise TypeError("The return_storage arg must be a boolean.") - # Create custom numpy data struct containing the model parameters - params = np.zeros(1, dtype=self._dtype) - for param in self._param_list: - params[param] = getattr(self, param) + # If no parameters were passed, prepare array w. params from attributes + if params is None: + params = np.zeros(1, dtype=self._dtype) + for param in self._param_list: + params[param] = getattr(self, param) + + # Else, check the param input for correct datatype + else: + if params.dtype != self._dtype: + msg = ["The model parameters must be a numpy array of the ", + "models own custom data type."] + raise TypeError("".join(msg)) + # if only one parameter set is passed, expand dimensions to 1D + if isinstance(params, np.void): + params = np.expand_dims(params, params.ndim) # Call ABC-model simulation function and return results if return_storage: @@ -228,30 +244,40 @@ def _loss(X, *args): return loss_value -@njit +@njit(parallel=True) def _simulate_abc(params, prec, initial_state): - """Run a simulation of the ABC-model for given input and params.""" - # Unpack model parameters - a = params['a'][0] - b = params['b'][0] - c = params['c'][0] - + """Run a simulation of the ABC-model for given input and param sets.""" # Number of simulation timesteps num_timesteps = len(prec) - - # Initialize array for the simulated stream flow and the storage - qsim = np.zeros(num_timesteps, np.float64) - storage = np.zeros(num_timesteps, np.float64) - - # Set the initial storage value - storage[0] = initial_state - - # Model simulation - for t in range(1, num_timesteps): - # Calculate the streamflow - qsim[t] = (1 - a - b) * prec[t] + c * storage[t-1] - - # Update the storage - storage[t] = (1 - c) * storage[t-1] + a * prec[t] - - return qsim, storage + + # Initialize arrays for all simulations of all parameter sets. + qsim_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + storage_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + + # Process different param sets in parallel through 'prange'-loop + for i in prange(params.size): + # Unpack model parameters + a = params['a'][i] + b = params['b'][i] + c = params['c'][i] + + # Initialize array for the simulated stream flow and the storage + qsim = np.zeros(num_timesteps, np.float64) + storage = np.zeros(num_timesteps, np.float64) + + # Set the initial storage value + storage[0] = initial_state + + # Model simulation + for t in range(1, num_timesteps): + # Calculate the streamflow + qsim[t] = (1 - a - b) * prec[t] + c * storage[t-1] + + # Update the storage + storage[t] = (1 - c) * storage[t-1] + a * prec[t] + + # copy results into arrays of all qsims and storages + qsim_all[:, i] = qsim + storage_all[:, i] = storage + + return qsim_all, storage_all \ No newline at end of file diff --git a/rrmpg/models/gr4j.py b/rrmpg/models/gr4j.py index e891b57..40327a5 100644 --- a/rrmpg/models/gr4j.py +++ b/rrmpg/models/gr4j.py @@ -11,7 +11,7 @@ import numpy as np -from numba import njit +from numba import njit, prange from scipy import optimize from .basemodel import BaseModel @@ -71,7 +71,8 @@ def __init__(self, params=None): """ super().__init__(params=params) - def simulate(self, prec, etp, s_init=0., r_init=0., return_storage=False): + def simulate(self, prec, etp, s_init=0., r_init=0., return_storage=False, + params=None): """Simulate rainfall-runoff process for given input. This function bundles the model parameters and validates the @@ -90,6 +91,10 @@ def simulate(self, prec, etp, s_init=0., r_init=0., return_storage=False): of x3. return_stprage: (optional) Boolean, indicating if the model storages should also be returned. + params: (optional) Numpy array of parameter sets, that will be + evaluated a once in parallel. Must be of the models own custom + data type. If nothing is passed, the parameters, stored in the + model object, will be used. Returns: An array with the simulated streamflow and optional one array for @@ -132,19 +137,30 @@ def simulate(self, prec, etp, s_init=0., r_init=0., return_storage=False): " range [0,1]."] raise ValueError("".join(msg)) - # bundle model parameters in custom numpy array - model_params = np.zeros(1, dtype=self._dtype) - for param in self._param_list: - model_params[param] = getattr(self, param) + # If no parameters were passed, prepare array w. params from attributes + if params is None: + params = np.zeros(1, dtype=self._dtype) + for param in self._param_list: + params[param] = getattr(self, param) + + # Else, check the param input for correct datatype + else: + if params.dtype != self._dtype: + msg = ["The model parameters must be a numpy array of the ", + "models own custom data type."] + raise TypeError("".join(msg)) + # if only one parameter set is passed, expand dimensions to 1D + if isinstance(params, np.void): + params = np.expand_dims(params, params.ndim) if return_storage: qsim, s_store, r_store = _simulate_gr4j(prec, etp, s_init, - r_init, model_params) + r_init, params) return qsim, s_store, r_store else: qsim, _, _ = _simulate_gr4j(prec, etp, s_init, r_init, - model_params) + params) return qsim def fit(self, qobs, prec, etp, s_init=0., r_init=0.): @@ -275,123 +291,137 @@ def _s_curve2(t, x4): else: return 1. -@njit -def _simulate_gr4j(prec, etp, s_init, r_init, model_params): + +@njit(parallel=True) +def _simulate_gr4j(prec, etp, s_init, r_init, params): """Actual run function of the gr4j hydrological model.""" - # Unpack the model parameters - x1 = model_params['x1'][0] - x2 = model_params['x2'][0] - x3 = model_params['x3'][0] - x4 = model_params['x4'][0] - - # get the number of simulation timesteps + # Number of simulation timesteps num_timesteps = len(prec) - # initialize empty arrays with one additional timestep, since the 0th will - # be used as intial state and the first simulated is the array entry 1 - s_store = np.zeros(num_timesteps + 1, np.float64) - r_store = np.zeros(num_timesteps + 1, np.float64) - qsim = np.zeros(num_timesteps + 1, np.float64) - - # for clean array indexing, add 0 element at the 0th index of prec and etp - # so we start simulating at the index 1 - prec = np.concatenate((np.zeros(1), prec)) - etp = np.concatenate((np.zeros(1), etp)) - - # set initial values - s_store[0] = s_init * x1 - r_store[0] = r_init * x3 - - # calculate number of unit hydrograph ordinates - num_uh1 = int(np.ceil(x4)) - num_uh2 = int(np.ceil(2*x4 + 1)) - - # calculate the ordinates of both unit-hydrographs (eq. 16 & 17) - uh1_ordinates = np.zeros(num_uh1, np.float64) - uh2_ordinates = np.zeros(num_uh2, np.float64) - - for j in range(1, num_uh1 + 1): - uh1_ordinates[j - 1] = _s_curve1(j, x4) - _s_curve1(j - 1, x4) - - for j in range(1, num_uh2 + 1): - uh2_ordinates[j - 1] = _s_curve2(j, x4) - _s_curve2(j - 1, x4) + # Initialize arrays for all simulations of all parameter sets. + qsim_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + s_store_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + r_store_all = np.zeros((num_timesteps, params.size), dtype=np.float64) - # empty arrays to store the rain distributed through the unit hydrographs - uh1 = np.zeros(num_uh1, np.float64) - uh2 = np.zeros(num_uh2, np.float64) + # Process different param sets in parallel through 'prange'-loop + for i in prange(params.size): - # Start the model simulation loop - for t in range(1, num_timesteps+1): - - # Calculate netto precipitation and evaporation - if prec[t] >= etp[t]: - p_n = prec[t] - etp[t] - pe_n = 0 - - # calculate fraction of netto precipitation that fills production - # store (eq. 3) - p_s = ((x1 * (1 - (s_store[t-1] / x1)**2) * np.tanh(p_n/x1)) / - (1 + s_store[t-1] / x1 * np.tanh(p_n / x1))) + # Unpack the model parameters + x1 = params['x1'][i] + x2 = params['x2'][i] + x3 = params['x3'][i] + x4 = params['x4'][i] + + # get the number of simulation timesteps + num_timesteps = len(prec) + + # initialize empty arrays w. 1 additional timestep, since the 0th will + # be used as intial state and the first simulated is the array entry 1 + s_store = np.zeros(num_timesteps + 1, np.float64) + r_store = np.zeros(num_timesteps + 1, np.float64) + qsim = np.zeros(num_timesteps + 1, np.float64) + + # for clean array indexing, add 0 element at the 0th index of prec and + # etp so we start simulating at the index 1 + prec = np.concatenate((np.zeros(1), prec)) + etp = np.concatenate((np.zeros(1), etp)) + + # set initial values + s_store[0] = s_init * x1 + r_store[0] = r_init * x3 + + # calculate number of unit hydrograph ordinates + num_uh1 = int(np.ceil(x4)) + num_uh2 = int(np.ceil(2*x4 + 1)) + + # calculate the ordinates of both unit-hydrographs (eq. 16 & 17) + uh1_ordinates = np.zeros(num_uh1, np.float64) + uh2_ordinates = np.zeros(num_uh2, np.float64) + + for j in range(1, num_uh1 + 1): + uh1_ordinates[j - 1] = _s_curve1(j, x4) - _s_curve1(j - 1, x4) - # no evaporation from production store - e_s = 0 + for j in range(1, num_uh2 + 1): + uh2_ordinates[j - 1] = _s_curve2(j, x4) - _s_curve2(j - 1, x4) - else: - p_n = 0 - pe_n = etp[t] - prec[t] + # arrys to store the rain distributed through the unit hydrographs + uh1 = np.zeros(num_uh1, np.float64) + uh2 = np.zeros(num_uh2, np.float64) + + # Start the model simulation loop + for t in range(1, num_timesteps+1): - # calculate the fraction of the evaporation that will evaporate - # from the production store (eq. 4) - e_s = ((s_store[t-1] * (2 - s_store[t-1]/x1) * np.tanh(pe_n/x1)) / - (1 + (1 - s_store[t-1] / x1) * np.tanh(pe_n / x1))) + # Calculate netto precipitation and evaporation + if prec[t] >= etp[t]: + p_n = prec[t] - etp[t] + pe_n = 0 - # no rain that is allocation to the production store - p_s = 0 + # calculate fraction of netto precipitation that fills + # production store (eq. 3) + p_s = ((x1 * (1 - (s_store[t-1] / x1)**2) * np.tanh(p_n/x1)) / + (1 + s_store[t-1] / x1 * np.tanh(p_n / x1))) + + # no evaporation from production store + e_s = 0 - # Calculate the new storage content - s_store[t] = s_store[t-1] - e_s + p_s - - # calculate percolation from actual storage level - perc = s_store[t] * (1 - (1 + (4 / 9 * s_store[t] / x1)**4)**(-0.25)) - - # final update of the production store for this timestep - s_store[t] = s_store[t] - perc - - # total quantity of water that reaches the routing - p_r = perc + (p_n - p_s) - - # split this water quantity by .9/.1 for different routing (UH1 & UH2) - p_r_uh1 = 0.9 * p_r - p_r_uh2 = 0.1 * p_r - - # update the state of the rain distributed through the unit hydrographs - for j in range(0, num_uh1 - 1): - uh1[j] = uh1[j + 1] + uh1_ordinates[j] * p_r_uh1 - uh1[-1] = uh1_ordinates[-1] * p_r_uh1 - - for j in range(0, num_uh2 - 1): - uh2[j] = uh2[j + 1] + uh2_ordinates[j] * p_r_uh2 - uh2[-1] = uh2_ordinates[-1] * p_r_uh2 - - # calculate the groundwater exchange F (eq. 18) - gw_exchange = x2 * (r_store[t - 1] / x3) ** 3.5 - - # update routing store - r_store[t] = max(0, r_store[t - 1] + uh1[0] + gw_exchange) - - # outflow of routing store - q_r = r_store[t] * (1 - (1 + (r_store[t] / x3)**4)**(-0.25)) - - # subtract outflow from routing store level - r_store[t] = r_store[t] - q_r - - # calculate flow component of unit hydrograph 2 - q_d = max(0, uh2[0] + gw_exchange) - - # total discharge of this timestep - qsim[t] = q_r + q_d - - # only return the simulated timesteps, not the intial state 0 - return qsim[1:], s_store[1:], r_store[1:] + else: + p_n = 0 + pe_n = etp[t] - prec[t] + + # calculate the fraction of the evaporation that will evaporate + # from the production store (eq. 4) + e_s = ((s_store[t-1] * (2 - s_store[t-1]/x1) * np.tanh(pe_n/x1)) + / (1 + (1 - s_store[t-1] / x1) * np.tanh(pe_n / x1))) + + # no rain that is allocation to the production store + p_s = 0 + + # Calculate the new storage content + s_store[t] = s_store[t-1] - e_s + p_s + + # calculate percolation from actual storage level + perc = s_store[t] * (1 - (1 + (4/9 * s_store[t] / x1)**4)**(-0.25)) + + # final update of the production store for this timestep + s_store[t] = s_store[t] - perc + + # total quantity of water that reaches the routing + p_r = perc + (p_n - p_s) + + # split this water quantity by .9/.1 for diff. routing (UH1 & UH2) + p_r_uh1 = 0.9 * p_r + p_r_uh2 = 0.1 * p_r + + # update state of rain, distributed through the unit hydrographs + for j in range(0, num_uh1 - 1): + uh1[j] = uh1[j + 1] + uh1_ordinates[j] * p_r_uh1 + uh1[-1] = uh1_ordinates[-1] * p_r_uh1 + + for j in range(0, num_uh2 - 1): + uh2[j] = uh2[j + 1] + uh2_ordinates[j] * p_r_uh2 + uh2[-1] = uh2_ordinates[-1] * p_r_uh2 + + # calculate the groundwater exchange F (eq. 18) + gw_exchange = x2 * (r_store[t - 1] / x3) ** 3.5 + + # update routing store + r_store[t] = max(0, r_store[t - 1] + uh1[0] + gw_exchange) + + # outflow of routing store + q_r = r_store[t] * (1 - (1 + (r_store[t] / x3)**4)**(-0.25)) + + # subtract outflow from routing store level + r_store[t] = r_store[t] - q_r + + # calculate flow component of unit hydrograph 2 + q_d = max(0, uh2[0] + gw_exchange) - \ No newline at end of file + # total discharge of this timestep + qsim[t] = q_r + q_d + + # copy simulated timesteps, not the intial state 0 + qsim_all[:, i] = qsim[1:] + s_store_all[:, i] = s_store[1:] + r_store_all[:, i] = r_store[1:] + + return qsim_all, s_store_all, r_store_all \ No newline at end of file diff --git a/rrmpg/models/hbvedu.py b/rrmpg/models/hbvedu.py index 1d1209a..8c2bb0f 100644 --- a/rrmpg/models/hbvedu.py +++ b/rrmpg/models/hbvedu.py @@ -14,8 +14,9 @@ import numpy as np -from numba import njit +from numba import njit, prange from scipy import optimize + from .basemodel import BaseModel from ..utils.metrics import mse from ..utils.array_checks import check_for_negatives, validate_array_input @@ -94,7 +95,7 @@ def __init__(self, area, params=None): raise ValueError("Area must be a positiv numercial value.") def simulate(self, temp, prec, month, PE_m, T_m, snow_init=0, soil_init=0, - s1_init=0, s2_init=0, return_storage=False): + s1_init=0, s2_init=0, return_storage=False, params=None): """Simulate rainfall-runoff process for given input. This function bundles the model parameters and validates the @@ -119,6 +120,10 @@ def simulate(self, temp, prec, month, PE_m, T_m, snow_init=0, soil_init=0, s2_init: (optional) Initial state of the base flow reservoir. return_storage: (optional) Boolean, indicating if the model storages should also be returned. + params: (optional) Numpy array of parameter sets, that will be + evaluated a once in parallel. Must be of the models own custom + data type. If nothing is passed, the parameters, stored in the + model object, will be used. Returns: An array with the simulated streamflow and optional one array for @@ -168,17 +173,28 @@ def simulate(self, temp, prec, month, PE_m, T_m, snow_init=0, soil_init=0, s1_init = float(s1_init) s2_init = float(s2_init) - # bundle model parameters in custom numpy array - model_params = np.zeros(1, dtype=self._dtype) - for param in self._param_list: - model_params[param] = getattr(self, param) + # If no parameters were passed, prepare array w. params from attributes + if params is None: + params = np.zeros(1, dtype=self._dtype) + for param in self._param_list: + params[param] = getattr(self, param) + + # Else, check the param input for correct datatype + else: + if params.dtype != self._dtype: + msg = ["The model parameters must be a numpy array of the ", + "models own custom data type."] + raise TypeError("".join(msg)) + # if only one parameter set is passed, expand dimensions to 1D + if isinstance(params, np.void): + params = np.expand_dims(params, params.ndim) if return_storage: # call the actual simulation function qsim, snow, soil, s1, s2 = _simulate_hbv_edu(temp, prec, month, PE_m, T_m, snow_init, soil_init, s1_init, - s2_init, model_params) + s2_init, params) # TODO: conversion from qobs in m³/s for different time resolutions # At the moment expects daily input data @@ -190,7 +206,7 @@ def simulate(self, temp, prec, month, PE_m, T_m, snow_init=0, soil_init=0, # call the actual simulation function qsim, _, _, _, _ = _simulate_hbv_edu(temp, prec, month, PE_m, T_m, snow_init, soil_init, s1_init, - s2_init, model_params) + s2_init, params) # TODO: conversion from qobs in m³/s for different time resolutions # At the moment expects daily input data @@ -325,83 +341,99 @@ def _loss(X, *args): return loss_value -@njit +@njit(parallel=True) def _simulate_hbv_edu(temp, prec, month, PE_m, T_m, snow_init, soil_init, - s1_init, s2_init, model_params): + s1_init, s2_init, params): """Run the educational HBV model for given inputs and model parameters.""" - # Unpack the model parameters - T_t = model_params['T_t'][0] - DD = model_params['DD'][0] - FC = model_params['FC'][0] - Beta = model_params['Beta'][0] - C = model_params['C'][0] - PWP = model_params['PWP'][0] - K_0 = model_params['K_0'][0] - K_1 = model_params['K_1'][0] - K_2 = model_params['K_2'][0] - K_p = model_params['K_p'][0] - L = model_params['L'][0] - - # get number of simulation timesteps - num_timesteps = len(temp) - - # initialize empty arrays for all reservoirs and outflow - snow = np.zeros(num_timesteps, np.float64) - soil = np.zeros(num_timesteps, np.float64) - s1 = np.zeros(num_timesteps, np.float64) - s2 = np.zeros(num_timesteps, np.float64) - qsim = np.zeros(num_timesteps, np.float64) - - # set initial values - snow[0] = snow_init - soil[0] = soil_init - s1[0] = s1_init - s2[0] = s2_init - - # Start the model simulation as loop over all timesteps - for t in range(1, num_timesteps): - - # Check if temperature is below threshold - if temp[t] < T_t: - # accumulate snow - snow[t] = snow[t-1] + prec[t] - # no liquid water - liquid_water = 0 - else: - # melt snow - snow[t] = max(0, snow[t-1] - DD * (temp[t] - T_t)) - # add melted snow to available liquid water - liquid_water = prec[t] + min(snow[t-1], DD * (temp[t] - T_t)) - - # calculate the effective precipitation - prec_eff = liquid_water * (soil[t-1] / FC) ** Beta - - # Calculate the potential evapotranspiration - pe = (1 + C * (temp[t] - T_m[month[t]])) * PE_m[month[t]] - - # Calculate the actual evapotranspiration - if soil[t-1] > PWP: - ea = pe - else: - ea = pe * (soil[t-1] / PWP) - - # calculate the actual level of the soil reservoir - soil[t] = soil[t-1] + liquid_water - prec_eff - ea - - # calculate the actual level of the near surface flow reservoir - s1[t] = (s1[t-1] - + prec_eff - - max(0, s1[t-1] - L) * K_0 - - s1[t-1] * K_1 - - s1[t-1] * K_p) - - # calculate the actual level of the base flow reservoir - s2[t] = (s2[t-1] - + s1[t-1] * K_p - - s2[t-1] * K_2) - - qsim[t] = ((max(0, s1[t-1] - L)) * K_0 - + s1[t] * K_1 - + s2[t] * K_2) + # Number of simulation timesteps + num_timesteps = len(prec) + + # Initialize arrays for all simulations of all parameter sets. + qsim_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + snow_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + soil_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + s1_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + s2_all = np.zeros((num_timesteps, params.size), dtype=np.float64) + + # Process different param sets in parallel through 'prange'-loop + for i in prange(params.size): + # Unpack the model parameters + T_t = params['T_t'][i] + DD = params['DD'][i] + FC = params['FC'][i] + Beta = params['Beta'][i] + C = params['C'][i] + PWP = params['PWP'][i] + K_0 = params['K_0'][i] + K_1 = params['K_1'][i] + K_2 = params['K_2'][i] + K_p = params['K_p'][i] + L = params['L'][i] + + # initialize empty arrays for all reservoirs and outflow + snow = np.zeros(num_timesteps, np.float64) + soil = np.zeros(num_timesteps, np.float64) + s1 = np.zeros(num_timesteps, np.float64) + s2 = np.zeros(num_timesteps, np.float64) + qsim = np.zeros(num_timesteps, np.float64) + + # set initial values + snow[0] = snow_init + soil[0] = soil_init + s1[0] = s1_init + s2[0] = s2_init + + # Start the model simulation as loop over all timesteps + for t in range(1, num_timesteps): + + # Check if temperature is below threshold + if temp[t] < T_t: + # accumulate snow + snow[t] = snow[t-1] + prec[t] + # no liquid water + liquid_water = 0 + else: + # melt snow + snow[t] = max(0, snow[t-1] - DD * (temp[t] - T_t)) + # add melted snow to available liquid water + liquid_water = prec[t] + min(snow[t-1], DD * (temp[t] - T_t)) + + # calculate the effective precipitation + prec_eff = liquid_water * (soil[t-1] / FC) ** Beta + + # Calculate the potential evapotranspiration + pe = (1 + C * (temp[t] - T_m[month[t]])) * PE_m[month[t]] + + # Calculate the actual evapotranspiration + if soil[t-1] > PWP: + ea = pe + else: + ea = pe * (soil[t-1] / PWP) + + # calculate the actual level of the soil reservoir + soil[t] = soil[t-1] + liquid_water - prec_eff - ea + + # calculate the actual level of the near surface flow reservoir + s1[t] = (s1[t-1] + + prec_eff + - max(0, s1[t-1] - L) * K_0 + - s1[t-1] * K_1 + - s1[t-1] * K_p) + + # calculate the actual level of the base flow reservoir + s2[t] = (s2[t-1] + + s1[t-1] * K_p + - s2[t-1] * K_2) + + qsim[t] = ((max(0, s1[t-1] - L)) * K_0 + + s1[t] * K_1 + + s2[t] * K_2) + + # copy results into arrays of all qsims and storages + qsim_all[:, i] = qsim + snow_all[:, i] = snow + soil_all[:, i] = soil + s1_all[:, i] = s1 + s2_all[:, i] = s2 - return qsim, snow, soil, s1, s2 + return qsim_all, snow_all, soil_all, s1_all, s2_all diff --git a/rrmpg/tools/monte_carlo.py b/rrmpg/tools/monte_carlo.py index 324829f..6a5dbd2 100644 --- a/rrmpg/tools/monte_carlo.py +++ b/rrmpg/tools/monte_carlo.py @@ -35,8 +35,8 @@ def monte_carlo(model, num, qobs=None, **kwargs): key 'params' contains a numpy array with the model parameter of each simulation. 'qsim' is a 2D numpy array with the simulated streamflow for each simulation. If an array of observed streamflow is provided, - one additional key is returned in the dictonary, being 'nse'. This key - contains an array of the Nash-Sutcliff-Efficiency for each simulation. + one additional key is returned in the dictonary, being 'mse'. This key + contains an array of the mean-squared-error for each simulation. Raises: ValueError: If any input contains invalid values. @@ -59,26 +59,18 @@ def monte_carlo(model, num, qobs=None, **kwargs): # Generate sets of random parameters params = model.get_random_params(num=num) - - # Initialize arrays simulations and model efficiency - qsim = np.zeros((len(kwargs['prec']), num), dtype=np.float64) - nse_values = np.zeros(num, dtype=np.float64) - - # Perform monte-carlo-simulations - for n in range(num): - # set random params as model parameter - model.set_params(params[n]) + # perform monte carlo simulation by calculating simulation of all param sets + qsim = model.simulate(params=params, **kwargs) - # calculate simulation - qsim[:, n] = model.simulate(**kwargs).flatten() + if qobs is not None: + # calculate nse of each simulation + mse_values = np.zeros(num, dtype=np.float64) - if qobs is not None: - # calculate model efficiency - nse_values[n] = mse(qobs, qsim[:, n]) + for n in range(num): + mse_values[n] = mse(qobs, qsim[:, n]) - if qobs is not None: - return {'params': params, 'qsim': qsim, 'mse': nse_values} + return {'params': params, 'qsim': qsim, 'mse': mse_values} else: return {'params': params, 'qsim': qsim} diff --git a/test/test_models.py b/test/test_models.py index 1ce0515..50c68c6 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -160,5 +160,5 @@ def test_simulate_compare_against_excel(self): data = pd.read_csv(data_file, sep=',') qsim = self.model.simulate(data.prec, data.etp, s_init=s_init, r_init=r_init, return_storage=False) - self.assertTrue(np.allclose(qsim, data.qsim_excel)) + self.assertTrue(np.allclose(qsim.flatten(), data.qsim_excel)) \ No newline at end of file