From 0d7e8164164c8674b20c71612546588f24bb309c Mon Sep 17 00:00:00 2001 From: Raoul Collenteur Date: Thu, 30 Nov 2023 21:14:53 +0100 Subject: [PATCH 01/10] Update dev-branch to 1.4.0b --- pastas/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastas/version.py b/pastas/version.py index 7fb2b106..ada69ce1 100644 --- a/pastas/version.py +++ b/pastas/version.py @@ -4,7 +4,7 @@ logger = logging.getLogger(__name__) -__version__ = "1.3.0" +__version__ = "1.4.0b" def check_numba_scipy() -> bool: From 9c766898724bd6a54d7cf98abd444880d7d7d71a Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Thu, 4 Jan 2024 12:26:10 +0100 Subject: [PATCH 02/10] fix for Problem adding LinearTrend #675 (#676) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Davíd Brakenhoff --- pastas/model.py | 54 ++++++++++++++++++--------------- pastas/plotting/modelcompare.py | 5 +-- pastas/plotting/modelplots.py | 3 +- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/pastas/model.py b/pastas/model.py index 97b3b5a5..e7b493d9 100644 --- a/pastas/model.py +++ b/pastas/model.py @@ -27,6 +27,7 @@ from pastas.modelstats import Statistics from pastas.noisemodels import NoiseModel from pastas.plotting.modelplots import Plotting, _table_formatter_stderr +from pastas.rfunc import HantushWellModel from pastas.solver import LeastSquares from pastas.stressmodels import Constant from pastas.timeseries import TimeSeries @@ -44,6 +45,8 @@ from pastas.utils import validate_name from pastas.version import __version__ +logger = getLogger(__name__) + class Model: """Class that initiates a Pastas time series model. @@ -93,8 +96,6 @@ def __init__( metadata: Optional[dict] = None, freq: str = "D", ) -> None: - self.logger = getLogger(__name__) - # Construct the different model components self.oseries = TimeSeries(oseries, settings="oseries", metadata=metadata) @@ -211,13 +212,13 @@ def add_stressmodel( for sm in stressmodel: self.add_stressmodel(sm) elif (stressmodel.name in self.stressmodels.keys()) and not replace: - self.logger.error( + logger.error( "The name for the stressmodel you are trying to add already exists " "for this model. Select another name." ) else: if stressmodel.name in self.stressmodels.keys(): - self.logger.warning( + logger.warning( "The name for the stressmodel you are trying to add already " "exists for this model. The stressmodel is replaced." ) @@ -229,7 +230,7 @@ def add_stressmodel( if (stressmodel.tmin > self.oseries.series.index.max()) or ( stressmodel.tmax < self.oseries.series.index.min() ): - self.logger.warning( + logger.warning( "The stress of the stressmodel has no overlap with ml.oseries." ) self._check_stressmodel_compatibility() @@ -318,7 +319,7 @@ def del_stressmodel(self, name: str): def del_constant(self) -> None: """Method to safely delete the Constant from the Model.""" if self.constant is None: - self.logger.warning("No constant is present in this model.") + logger.warning("No constant is present in this model.") else: self.constant = None self.parameters = self.get_init_parameters(initial=False) @@ -326,7 +327,7 @@ def del_constant(self) -> None: def del_transform(self) -> None: """Method to safely delete the transform from the Model.""" if self.transform is None: - self.logger.warning("No transform is present in this model.") + logger.warning("No transform is present in this model.") else: self.transform = None self.parameters = self.get_init_parameters(initial=False) @@ -334,7 +335,7 @@ def del_transform(self) -> None: def del_noisemodel(self) -> None: """Method to safely delete the noise model from the Model.""" if self.noisemodel is None: - self.logger.warning("No noisemodel is present in this model.") + logger.warning("No noisemodel is present in this model.") else: self.noisemodel = None self.parameters = self.get_init_parameters(initial=False) @@ -436,7 +437,7 @@ def simulate( "are provided for each stress model " "(e.g. `ps.StressModel(stress, settings='prec')`!" ) - self.logger.error(msg) + logger.error(msg) raise ValueError(msg) sim.name = "Simulation" @@ -492,7 +493,7 @@ def residuals( if self.interpolate_simulation is None: if oseries_calib.index.difference(sim.index).size != 0: self.interpolate_simulation = True - self.logger.info( + logger.info( "There are observations between the simulation time steps. Linear " "interpolation between simulated values is used." ) @@ -510,7 +511,7 @@ def residuals( if res.hasnans: res = res.dropna() - self.logger.warning("Nan-values were removed from the residuals.") + logger.warning("Nan-values were removed from the residuals.") if self.normalize_residuals: res = res.subtract(res.values.mean()) @@ -563,7 +564,7 @@ def noise( This method returns None if no noise model is present in the model. """ if self.noisemodel is None or self.settings["noise"] is False: - self.logger.error( + logger.error( "Noise cannot be calculated if there is no noisemodel present or is " "not used during parameter estimation." ) @@ -688,7 +689,7 @@ def initialize( if noise is None and self.noisemodel: noise = True elif noise is True and self.noisemodel is None: - self.logger.warning( + logger.warning( "Warning, solving with noise=True while no noisemodel is present. " "noise set to False." ) @@ -839,7 +840,7 @@ def solve( noise=self.settings["noise"], weights=weights, **kwargs ) if not success: - self.logger.warning("Model parameters could not be estimated well.") + logger.warning("Model parameters could not be estimated well.") if self.settings["fit_constant"] is False: # Determine the residuals and set the constant to their mean @@ -865,7 +866,7 @@ def fit(self): "Attribute 'fit' is deprecated and will be removed in a future version. " "Use 'solver' instead." ) - self.logger.warning(msg) + logger.warning(msg) return self.solver @@ -915,7 +916,7 @@ def set_parameter( """ if name not in self.parameters.index: msg = "parameter %s is not present in the model" - self.logger.error(msg, name) + logger.error(msg, name) raise KeyError(msg, name) # Because either of the following is not necessarily present @@ -989,7 +990,7 @@ def _get_time_offset(self, freq: str) -> Timedelta: time_offsets.add(_get_time_offset(t[mask][0], freq)) if len(time_offsets) > 1: msg = "The time-offset with the frequency is not the same for all stresses." - self.logger.error(msg) + logger.error(msg) raise (Exception(msg)) if len(time_offsets) == 1: return next(iter(time_offsets)) @@ -1238,9 +1239,7 @@ def get_parameters(self, name: Optional[str] = None) -> ArrayLike: p = self.parameters if p.optimal.hasnans: - self.logger.warning( - "Model is not optimized yet, initial parameters are used." - ) + logger.warning("Model is not optimized yet, initial parameters are used.") parameters = p.initial else: parameters = p.optimal @@ -1494,7 +1493,7 @@ def _get_response( Pandas.Series with the response, None if not present. """ if self.stressmodels[name].rfunc is None: - self.logger.warning("Stressmodel %s has no rfunc.", name) + logger.warning("Stressmodel %s has no rfunc.", name) return None else: block_or_step = getattr(self.stressmodels[name].rfunc, block_or_step) @@ -1629,12 +1628,12 @@ def get_response_tmax( to a recharge pulse has taken place. """ if self.stressmodels[name].rfunc is None: - self.logger.warning("Stressmodel %s has no rfunc", name) + logger.warning("Stressmodel %s has no rfunc", name) return None else: if p is None: p = self.get_parameters(name) - if self.stressmodels[name].rfunc._name == "HantushWellModel": + if isinstance(self.stressmodels[name].rfunc, HantushWellModel): kwargs = {"warn": warn} else: kwargs = {} @@ -1907,14 +1906,19 @@ def _check_response_tmax(self, cutoff: Optional[float] = None) -> DataFrame: len_oseries_calib = (self.settings["tmax"] - self.settings["tmin"]).days + # only check stressmodels with a response function + sm_names = [ + key for key, item in self.stressmodels.items() if item.rfunc is not None + ] + check = DataFrame( - index=self.stressmodels.keys(), + index=sm_names, columns=["len_oseries_calib", "response_tmax", "check_ok"], ) check["len_oseries_calib"] = len_oseries_calib for sm_name in self.stressmodels: - if self.stressmodels[sm_name].rfunc._name == "HantushWellModel": + if isinstance(self.stressmodels[sm_name].rfunc, HantushWellModel): kwargs = {"warn": False} else: kwargs = {} diff --git a/pastas/plotting/modelcompare.py b/pastas/plotting/modelcompare.py index 4fabac51..177487ee 100644 --- a/pastas/plotting/modelcompare.py +++ b/pastas/plotting/modelcompare.py @@ -10,6 +10,7 @@ from pandas import DataFrame, concat from pastas.plotting.plotutil import _table_formatter_params, share_xaxes, share_yaxes +from pastas.rfunc import HantushWellModel from pastas.stats.core import acf from pastas.typing import Axes, Model @@ -574,7 +575,7 @@ def plot_response( if response == "step": kwargs = {} if ml.stressmodels[smn].rfunc is not None: - if ml.stressmodels[smn].rfunc._name == "HantushWellModel": + if isinstance(ml.stressmodels[smn].rfunc, HantushWellModel): kwargs = {"warn": False} step = ml.get_step_response(smn, add_0=True, **kwargs) if step is None: @@ -595,7 +596,7 @@ def plot_response( elif response == "block": kwargs = {} if ml.stressmodels[smn].rfunc is not None: - if ml.stressmodels[smn].rfunc._name == "HantushWellModel": + if isinstance(ml.stressmodels[smn].rfunc, HantushWellModel): kwargs = {"warn": False} block = ml.get_block_response(smn, **kwargs) if block is None: diff --git a/pastas/plotting/modelplots.py b/pastas/plotting/modelplots.py index 2e60d5e3..7ef008fa 100644 --- a/pastas/plotting/modelplots.py +++ b/pastas/plotting/modelplots.py @@ -19,6 +19,7 @@ _table_formatter_params, _table_formatter_stderr, ) +from pastas.rfunc import HantushWellModel from pastas.typing import Axes, Figure, Model, TimestampType logger = logging.getLogger(__name__) @@ -267,7 +268,7 @@ def results( # plot the step response rkwargs = {} if self.ml.stressmodels[sm_name].rfunc is not None: - if self.ml.stressmodels[sm_name].rfunc._name == "HantushWellModel": + if isinstance(self.ml.stressmodels[sm_name].rfunc, HantushWellModel): rkwargs = {"warn": False} response = self.ml._get_response( block_or_step=block_or_step, name=sm_name, add_0=True, **rkwargs From b3a179d537b337a145d9389552eaac14c0d1099f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ruben=20Calj=C3=A9?= Date: Fri, 26 Jan 2024 11:29:05 +0100 Subject: [PATCH 03/10] Fix issue #681 --- pastas/stressmodels.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pastas/stressmodels.py b/pastas/stressmodels.py index d50b784b..8a60b8fa 100644 --- a/pastas/stressmodels.py +++ b/pastas/stressmodels.py @@ -1620,6 +1620,7 @@ def __init__( self.dmin = dmin self.dmax = dmax super().__init__(prec=prec, evap=evap, rfunc=rfunc, **kwargs) + self.nsplit = 1 def set_init_parameters(self) -> None: # parameters for the first drainage level From a8d97b0bba2121debc0674d2ac5f6283c49e069f Mon Sep 17 00:00:00 2001 From: Raoul Collenteur Date: Mon, 5 Feb 2024 07:21:57 +0100 Subject: [PATCH 04/10] Update GH Actions --- .github/workflows/ci.yml | 2 +- .github/workflows/python-publish.yml | 2 +- .github/workflows/test_benchmark_notebooks.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34e820c2..464759b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: # Pytest PYTEST_ADDOPTS: "--color=yes" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python }} uses: actions/setup-python@v4 diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index ea7327ef..bcc4cd5c 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/test_benchmark_notebooks.yml b/.github/workflows/test_benchmark_notebooks.yml index 9dbb31ae..7fc1b57d 100644 --- a/.github/workflows/test_benchmark_notebooks.yml +++ b/.github/workflows/test_benchmark_notebooks.yml @@ -25,7 +25,7 @@ jobs: # Pytest PYTEST_ADDOPTS: "--color=yes" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python }} uses: actions/setup-python@v4 From bd511df0458ee3f752cac1de5f26926398d01e33 Mon Sep 17 00:00:00 2001 From: Raoul Collenteur Date: Mon, 5 Feb 2024 07:44:03 +0100 Subject: [PATCH 05/10] Ru Black on Dev-branch --- pastas/io/pas.py | 1 + pastas/objective_functions.py | 1 + pastas/plotting/modelcompare.py | 1 + pastas/stats/__init__.py | 1 + pastas/stats/signatures.py | 1 + pastas/stressmodels.py | 6 +++--- pastas/transform.py | 1 + 7 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pastas/io/pas.py b/pastas/io/pas.py index 602abc0c..2038764f 100644 --- a/pastas/io/pas.py +++ b/pastas/io/pas.py @@ -4,6 +4,7 @@ R.A. Collenteur - August 2017 """ + import datetime import json from collections import OrderedDict diff --git a/pastas/objective_functions.py b/pastas/objective_functions.py index 66bac17b..5bdcb9fa 100644 --- a/pastas/objective_functions.py +++ b/pastas/objective_functions.py @@ -2,6 +2,7 @@ `EmceeSolve` solver. """ + from numpy import log, pi from pandas import DataFrame diff --git a/pastas/plotting/modelcompare.py b/pastas/plotting/modelcompare.py index 177487ee..9556c08b 100644 --- a/pastas/plotting/modelcompare.py +++ b/pastas/plotting/modelcompare.py @@ -1,5 +1,6 @@ """This module contains tools for visually comparing multiple models. """ + from itertools import combinations from logging import getLogger from typing import List, Optional, Tuple diff --git a/pastas/stats/__init__.py b/pastas/stats/__init__.py index 7530f6ec..2992a62c 100644 --- a/pastas/stats/__init__.py +++ b/pastas/stats/__init__.py @@ -14,6 +14,7 @@ tests """ + # flake8: noqa import pastas.stats.metrics as metrics diff --git a/pastas/stats/signatures.py b/pastas/stats/signatures.py index 6719b1fc..36dd0676 100644 --- a/pastas/stats/signatures.py +++ b/pastas/stats/signatures.py @@ -1,4 +1,5 @@ """This module contains methods to compute the groundwater signatures.""" + # Type Hinting from typing import Optional, Tuple diff --git a/pastas/stressmodels.py b/pastas/stressmodels.py index 8a60b8fa..da39e38a 100644 --- a/pastas/stressmodels.py +++ b/pastas/stressmodels.py @@ -354,9 +354,9 @@ def __init__( tmax=stress.series.index.max(), rfunc=rfunc, up=up, - gain_scale_factor=stress.series.std() - if gain_scale_factor is None - else gain_scale_factor, + gain_scale_factor=( + stress.series.std() if gain_scale_factor is None else gain_scale_factor + ), ) self.gain_scale_factor = gain_scale_factor diff --git a/pastas/transform.py b/pastas/transform.py index 57948613..aca77496 100644 --- a/pastas/transform.py +++ b/pastas/transform.py @@ -2,6 +2,7 @@ These transforms are applied after the simulation, to incorporate nonlinear effects. """ + import numpy as np from pandas import DataFrame, Series From 6ce409098968632bdb355927a92df0289ca4d363 Mon Sep 17 00:00:00 2001 From: Martin Vonk <66305055+martinvonk@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:07:17 +0100 Subject: [PATCH 06/10] Support Python 3.12 + GitHub workflow update versions (#683) --- .github/workflows/auto-author-assign.yml | 2 +- .github/workflows/ci.yml | 4 ++-- .github/workflows/python-publish.yml | 2 +- .github/workflows/test_benchmark_notebooks.yml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/auto-author-assign.yml b/.github/workflows/auto-author-assign.yml index b213e40c..d253be72 100644 --- a/.github/workflows/auto-author-assign.yml +++ b/.github/workflows/auto-author-assign.yml @@ -12,4 +12,4 @@ jobs: assign-author: runs-on: ubuntu-latest steps: - - uses: toshimaru/auto-author-assign@v1.6.2 + - uses: toshimaru/auto-author-assign@v2.1.0 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 464759b2..29ca3e1a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: - name: Test suite with py312-ubuntu python: "3.12" toxenv: py312 - experimental: true + experimental: false - name: Formatting with black + isort python: "3.9" toxenv: format @@ -63,7 +63,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} check-latest: true diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index bcc4cd5c..e61fdba2 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/.github/workflows/test_benchmark_notebooks.yml b/.github/workflows/test_benchmark_notebooks.yml index 7fc1b57d..6655e2e6 100644 --- a/.github/workflows/test_benchmark_notebooks.yml +++ b/.github/workflows/test_benchmark_notebooks.yml @@ -28,7 +28,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} check-latest: true From 5251c97f9afd5e1a997da14260f33d5fd48ad837 Mon Sep 17 00:00:00 2001 From: Raoul Collenteur Date: Fri, 16 Feb 2024 09:21:11 +0100 Subject: [PATCH 07/10] Update groundwater signatures module (#636) Co-authored-by: MatevzVremec --- doc/examples/signatures.ipynb | 49 +- pastas/stats/signatures.py | 1224 ++++++++++++++++++++++++--------- tests/test_signatures.py | 37 +- 3 files changed, 932 insertions(+), 378 deletions(-) diff --git a/doc/examples/signatures.ipynb b/doc/examples/signatures.ipynb index e3b30be1..3d8b70db 100644 --- a/doc/examples/signatures.ipynb +++ b/doc/examples/signatures.ipynb @@ -9,17 +9,17 @@ "source": [ "# Groundwater signatures\n", "\n", - "*R.A. Collenteur, University of Graz, 2022*\n", + "*R.A. Collenteur, Eawag, 2023*\n", "\n", - "In this notebook we introduce the **groundwater signatures** module available in Pastas. The signatures methods can be accessed through the `signatures` module in the `pastas.stats` sub-package. This module is partly based on the work from [Heudorfer et al. (2019)](#References) and others (see [References](#References)).\n", + "In this notebook we introduce the **groundwater signatures** module available in Pastas. The signatures methods can be accessed through the `signatures` module in the `pastas.stats` sub-package.\n", "\n", - "Groundwater signatures characterize different aspects of a groundwater time series, and can be subdivided in different categories: shape, distribution, and structure. Groundwater signatures are also referred to as 'indices' or 'quantitative' metrics. In Pastas, 'signatures' is adopted to avoid any confusion with time indices and goodness-of-fit metrics. \n", + "Groundwater signatures are quantitative metrics that characterize different aspects of a groundwater time series. They are commonly subdivided in different categories: shape, distribution, and structure. Groundwater signatures are also referred to as 'indices' or 'quantitative' metrics. In Pastas, 'signatures' is adopted to avoid any confusion with time indices and goodness-of-fit metrics. \n", "\n", - "The signatures can be used to *objectively* characterize different groundwater systems, for example distinquishing between fast and slow groundwater systems. The use of signatures is common in other parts of hydrology (e.g., rainfall-runoff modeling) and can be applied in all phases of modeling (see, for example [McMillan, 2020](#References) for an overview). \n", + "The signatures can be used to *objectively* characterize different groundwater systems, for example, distinguishing between fast and slow groundwater systems. The use of signatures is common in other parts of hydrology (e.g., rainfall-runoff modeling) and can be applied in all phases of modeling (see, for example, [McMillan, 2021](#References) for an overview). \n", "\n", "
\n", "Note:\n", - "The `signatures` module is still work in progress and needs further verification. Please report any issues and bugs on the Pastas GitHub repository!\n", + "The `signatures` module is under active development and any help is welcome. Please report any issues and bugs on the Pastas GitHub repository!\n", "
" ] }, @@ -103,34 +103,21 @@ "source": [ "## 3. Plot the results\n", "\n", - "Depending on the signature, different ranges of parameters can be expected. We therefore split the signatures in two range for visualization purposes." + "Depending on the signature, different ranges of parameters can be expected. We therefore normalize the signatures values by the mean value of each signature. This way we can easily compare the two groundwater systems." ] }, { "cell_type": "code", "execution_count": null, - "id": "63e5dfaa", + "id": "e8fe7d8d-5ae1-4cae-a267-080f5cdcbf75", "metadata": {}, "outputs": [], "source": [ - "fig = plt.figure(constrained_layout=True, figsize=(10, 4))\n", - "gs = GridSpec(1, 5, figure=fig)\n", - "ax1 = fig.add_subplot(gs[0, :4])\n", - "ax2 = fig.add_subplot(gs[0, 4])\n", - "\n", - "low_values = df.where(df < 2).dropna()\n", - "low_values.plot(ax=ax1)\n", - "ax1.set_xticks(np.arange(len(low_values.index)))\n", - "ax1.set_xticklabels(low_values.index, rotation=90)\n", - "ax1.grid()\n", - "high_values = df.where(df > 2).dropna()\n", - "high_values.plot(ax=ax2)\n", - "ax2.set_xticks(np.arange(len(high_values)))\n", - "ax2.set_xticklabels(high_values.index, rotation=90)\n", - "ax2.grid()\n", - "\n", - "ax1.set_title(\"Low value signatures\")\n", - "ax2.set_title(\"High value signatures\");" + "_, ax = plt.subplots(1,1, figsize=(10, 4))\n", + "df.div(df.mean(axis=1), axis=0).mul(100).plot(ax=ax)\n", + "ax.set_xticks(np.arange(len(df.index)), df.index, rotation=90, )\n", + "ax.set_ylabel(\"% Change from the mean\\n signature value\")\n", + "ax.grid()" ] }, { @@ -141,14 +128,16 @@ }, "source": [ "## 4. Interpretation of signatures\n", - "The different signatures can be used to compare the different systems or characterize a single system. For example, the first head time series has a high bimodality coefficient (>0.7), indicating a bimodel distribution of the data. This makes sense, as this time series is used as an example for the non-linear threshold model (see notebook). Rather than (naively) testing all model structures, this is an example where we can potentially use a groundwater signature to identify a 'best' model structure beforehand.\n", "\n", - "Another example. The second time series is observed in a much slower groundwater system than the first. This is for example clearly visible and quantified by the different values for the 'pulse_duration', the 'recession and recovery constants', and the 'slope of the duration curves'. We could use this type of information to determine whether we should use a 'fast' or 'slow' response function (e.g., an Exponential or Gamma function). These are just some examples of how groundwater signatures can be used to improve groundwater modeling, more research on this topic is required. Please contact us if interested!\n", + "The different signatures can be used to compare the different systems or characterize a single system. For example, the first head time series has a high bimodality coefficient (>0.7), indicating a bimodal distribution of the data. This makes sense, as this time series is used as an example for the non-linear threshold model (see notebook). Rather than (naively) testing all model structures, this is an example where we can potentially use a groundwater signature to identify a 'best' model structure beforehand.\n", "\n", - "A little disclaimer: from the plot above it is actually not that straightforward to compare the indices, because the range in values is large. For example, the rise and fall rate visually show no differences, but their numbers vary by over 200%. Thus, interpretation requires some more work. \n", + "Another example. The second time series is observed in a much slower groundwater system than the first. This is, for example, clearly visible and quantified by the different values for the 'pulse_duration', the 'recession and recovery constants', and the 'slope of the duration curves'. We could use this type of information to determine whether we should use a 'fast' or 'slow' response function (e.g., an Exponential or Gamma function). These are just some examples of how groundwater signatures can be used to improve groundwater modeling, more research on this topic is required. Please contact us if interested!\n", + "\n", + "A little disclaimer: from the data above, it is actually not that straightforward to compare the signature values because the range in values is large. For example, the rise and fall rate show small differences in absolute values, but their numbers vary by over 200%. Thus, interpretation requires some more work. \n", "\n", "### References\n", - "The following references are helpfull in learning about the groundwater signatures:\n", + "\n", + "The following references are helpful in learning about the groundwater signatures:\n", "\n", "- Heudorfer, B., Haaf, E., Stahl, K., Barthel, R., 2019. [Index-based characterization and quantification of groundwater dynamics.](https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2018WR024418) Water Resources Research.\n", "- Haaf, E., Giese, M., Heudorfer, B., Stahl, K., Barthel, R., 2020. [Physiographic and Climatic Controls on Regional Groundwater Dynamics.](https://agupubs.onlinelibrary.wiley.com/doi/10.1029/2019WR026545) Water Resources Research.\n", @@ -173,7 +162,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.11.3" }, "vscode": { "interpreter": { diff --git a/pastas/stats/signatures.py b/pastas/stats/signatures.py index 36dd0676..4838c35d 100644 --- a/pastas/stats/signatures.py +++ b/pastas/stats/signatures.py @@ -1,29 +1,47 @@ """This module contains methods to compute the groundwater signatures.""" # Type Hinting -from typing import Optional, Tuple - -import pandas as pd -from numpy import arange, diff, log, nan, sqrt -from pandas import DatetimeIndex, Series, Timedelta, cut +from logging import getLogger +from typing import Optional, Tuple, Union + +from numpy import ( + arctan, + array, + cos, + diff, + exp, + isclose, + isnan, + linspace, + log, + nan, + ndarray, + pi, + sin, + split, + sqrt, + where, +) +from pandas import DataFrame, DatetimeIndex, Series, Timedelta, concat, cut, to_datetime +from scipy.optimize import curve_fit from scipy.stats import linregress import pastas as ps +from pastas.stats.core import acf __all__ = [ "cv_period_mean", "cv_date_min", + "cv_date_max", "cv_fall_rate", "cv_rise_rate", "parde_seasonality", "avg_seasonal_fluctuation", - "magnitude", "interannual_variation", "low_pulse_count", "high_pulse_count", "low_pulse_duration", "high_pulse_duration", - "amplitude_range", "bimodality_coefficient", "mean_annual_maximum", "rise_rate", @@ -35,44 +53,137 @@ "recession_constant", "recovery_constant", "duration_curve_slope", - "duration_curve_range", - "baseflow_index", + "duration_curve_ratio", "richards_pathlength", - "richards_baker_index", - "baseflow_stability", + "baselevel_index", + "baselevel_stability", + "magnitude", + "autocorr_time", + "date_min", + "date_max", ] +logger = getLogger(__name__) + def _normalize(series: Series) -> Series: + """Normalize the time series by subtracting the mean and dividing over the range. + + Parameters + ---------- + series: pandas.Series + Pandas Series to be normalized. + + Returns + ------- + series: pandas.Series + Pandas Series scaled by subtracting the mean and dividing over the range of the + values. This results in a time series with values between zero and one. + + """ series = (series - series.min()) / (series.max() - series.min()) return series -def cv_period_mean(series: Series, freq: str = "M") -> float: - """Coefficient of variation of mean head over a period (default monthly). +def cv_period_mean(series: Series, normalize: bool = False, freq: str = "M") -> float: + """Coefficient of variation of the mean head over a period (default monthly). Parameters ---------- series: pandas.Series Pandas Series with DatetimeIndex and head values. + normalize: bool, optional + normalize the time series to values between zero and one. freq: str, optional frequency to resample the series to by averaging. Returns ------- cv: float - Coefficient of variation of mean head over a period (default monthly). + Coefficient of variation of mean head resampled over a period (default monthly). Notes ----- - Coefficient of variation of mean monthly heads :cite:t:`hughes_hydrological_1989`. + Coefficient of variation of mean monthly heads, adapted after + :cite:t:`hughes_hydrological_1989`. The higher the coefficient of variation, the + more variable the mean monthly head is throughout the year, and vice versa. The + coefficient of variation is the standard deviation divided by the mean. + + Examples + -------- + >>> import pandas as pd + >>> from pastas.stats.signatures import cv_period_mean + >>> series = pd.Series([1, 2, 3, 4, 5, 6], + index=pd.date_range(start='2022-01-01', periods=6, freq='M')) + >>> cv = cv_period_mean(series) + >>> print(cv) """ + if normalize: + series = _normalize(series) + series = series.resample(freq).mean() - cv = series.std() / series.mean() + cv = series.std(ddof=1) / series.mean() # ddof=1 = > sample std return cv +def _cv_date_min_max(series: Series, stat: str) -> float: + """Method to compute the coefficient of variation of the date of annual + minimum or maximum head using circular statistics. + + Parameters + ---------- + series : Series + Pandas Series with DatetimeIndex and head values. + stat : str + "min" or "max" to compute the cv of the date of the annual minimum or maximum + head. + + Returns + ------- + float: + Circular coefficient of variation of the date of annual minimum or maximum + head. + + Notes + ----- + Coefficient of variation of the date of annual minimum or maximum head computed + using circular statistics as described in :cite:t:`fisher_statistical_1995` (page + 32). If there are multiple dates with the same minimum or maximum head, the first + date is chosen. The higher the coefficient of variation, the more variable the date + of the annual minimum or maximum head is, and vice versa. + + """ + if stat == "min": + data = series.groupby(series.index.year).idxmin(skipna=True).dropna().values + elif stat == "max": + data = series.groupby(series.index.year).idxmax(skipna=True).dropna().values + + doy = DatetimeIndex(data).dayofyear.to_numpy(float) + + m = 365.25 + two_pi = 2 * pi + + thetas = array(doy) * two_pi / m + c = cos(thetas).sum() + s = sin(thetas).sum() + r = sqrt(c**2 + s**2) / doy.size + + if (s > 0) & (c > 0): + mean_theta = arctan(s / c) + elif c < 0: + mean_theta = arctan(s / c) + pi + elif (s < 0) & (c > 0): + mean_theta = arctan(s / c) + two_pi + else: + # This should never happen + raise ValueError("Something went wrong in the circular statistics.") + + mu = mean_theta * m / two_pi + std = sqrt(-2 * log(r)) * m / two_pi + return std / mu + + def cv_date_min(series: Series) -> float: """Coefficient of variation of the date of annual minimum head. @@ -88,18 +199,45 @@ def cv_date_min(series: Series) -> float: Notes ----- - Coefficient of variation of the date of annual minimum groundwater head - according to :cite:t:`richter_method_1996`. + Coefficient of variation of the date of annual minimum head computed using circular + statistics as described in :cite:t:`fisher_statistical_1995` (page 32). If there + are multiple dates with the same minimum head, the first date is chosen. The higher + the coefficient of variation, the more variable the date of the annual minimum head + is, and vice versa. + + """ + cv = _cv_date_min_max(series, stat="min") + return cv + + +def cv_date_max(series: Series) -> float: + """Coefficient of variation of the date of annual maximum head. + + Parameters + ---------- + series: pandas.Series + Pandas Series with DatetimeIndex and head values. + + Returns + ------- + cv: float + Coefficient of variation of the date of annual maximum head. + + Notes + ----- + Coefficient of variation of the date of annual maximum head computed using circular + statistics as described in :cite:t:`fisher_statistical_1995` (page 32). If there + are multiple dates with the same maximum head, the first date is chosen. The higher + the coefficient of variation, the more variable the date of the maximum head is, + and vice versa. """ - data = series.groupby(series.index.year).idxmin().dropna().values - data = DatetimeIndex(data).dayofyear.to_numpy(float) - cv = data.std() / data.mean() + cv = _cv_date_min_max(series, stat="max") return cv def parde_seasonality(series: Series, normalize: bool = True) -> float: - """Parde seasonality according to :cite:t:`parde_fleuves_1933`. + """Parde seasonality according to :cite:t:`parde_fleuves_1933`, adapted for heads. Parameters ---------- @@ -110,20 +248,23 @@ def parde_seasonality(series: Series, normalize: bool = True) -> float: Returns ------- + float: + Parde seasonality. Notes ----- Pardé seasonality is the difference between the maximum and minimum Pardé coefficient. A Pardé series consists of 12 Pardé coefficients, corresponding to - 12 months. Pardé coefficient for, for example, January is its long‐term monthly - mean groundwater head divided by the overall mean groundwater head. + 12 months. Pardé coefficient for, for example, January is its long-term monthly + mean head divided by the overall mean head. The higher the Pardé seasonality, the + more seasonal the head is, and vice versa. """ - coefficients = parde_coefficients(series=series, normalize=normalize) + coefficients = _parde_coefficients(series=series, normalize=normalize) return coefficients.max() - coefficients.min() -def parde_coefficients(series: Series, normalize: bool = True) -> float: +def _parde_coefficients(series: Series, normalize: bool = True) -> Series: """Parde coefficients for each month :cite:t:`parde_fleuves_1933`. Parameters @@ -135,13 +276,32 @@ def parde_coefficients(series: Series, normalize: bool = True) -> float: Returns ------- + coefficients: pandas.Series + Parde coefficients for each month. Notes ----- Pardé seasonality is the difference between the maximum and minimum Pardé coefficient. A Pardé series consists of 12 Pardé coefficients, corresponding to - 12 months. Pardé coefficient for, for example, January is its long‐term monthly - mean groundwater head divided by the overall mean groundwater head. + 12 months. Pardé coefficient for, for example, January is its long-term monthly + mean head divided by the overall mean head. + + Examples + -------- + >>> import pandas as pd + >>> from pastas.stats.signatures import parde_coefficients + >>> series = pd.Series([1, 2, 3, 4, 5, 6], + index=pd.date_range(start='2022-01-01', periods=6, freq='M')) + >>> coefficients = parde_coefficients(series) + >>> print(coefficients) + month + 1 0.0 + 2 0.4 + 3 0.8 + 4 1.2 + 5 1.6 + 6 2.0 + dtype: float64 """ if normalize: @@ -152,8 +312,8 @@ def parde_coefficients(series: Series, normalize: bool = True) -> float: return coefficients -def _martens(series: Series, normalize: bool = True) -> Tuple[Series, Series]: - """Functions for the average seasonal fluctuation and inter annual fluctuation. +def _martens(series: Series, normalize: bool = False) -> Tuple[Series, Series]: + """Function for the average seasonal fluctuation and interannual fluctuation. Parameters ---------- @@ -165,28 +325,31 @@ def _martens(series: Series, normalize: bool = True) -> Tuple[Series, Series]: Returns ------- hl: pandas.Series - Lowest heads + Average of the three lowest heads in a year. hw: pandas.Series - Largest heads + Average of the three largest heads in a year. Notes ----- - According to :cite:t:`martens_groundwater_2013`. + According to :cite:t:`martens_groundwater_2013`. The average of the three lowest + and three highest heads in three different months for each year is computed. The + average is then taken over all years. """ - if normalize: series = _normalize(series) s = series.resample("M") - hl = s.min().groupby(s.min().index.year).nsmallest(3).groupby(level=0).mean() - hw = s.max().groupby(s.max().index.year).nlargest(3).groupby(level=0).mean() + s_min = s.min() + s_max = s.max() + hl = s_min.groupby(s_min.index.year).nsmallest(3).groupby(level=0).mean() + hw = s_max.groupby(s_max.index.year).nlargest(3).groupby(level=0).mean() return hl, hw -def avg_seasonal_fluctuation(series: Series, normalize: bool = True) -> float: - """Classification according to :cite:t:`martens_groundwater_2013`. +def avg_seasonal_fluctuation(series: Series, normalize: bool = False) -> float: + """Average seasonal fluctuation after :cite:t:`martens_groundwater_2013`. Parameters ---------- @@ -197,26 +360,32 @@ def avg_seasonal_fluctuation(series: Series, normalize: bool = True) -> float: Returns ------- - - float + float: + Average seasonal fluctuation (s). Notes ----- - Mean annual difference between the averaged 3 highest monthly groundwater heads - per year and the averaged 3 lowest monthly groundwater heads per year. + Mean annual difference between the averaged 3 highest monthly heads + per year and the averaged 3 lowest monthly heads per year. Average seasonal fluctuation (s): s = MHW - MLW + A higher value of s indicates a more seasonal head, and vice versa. + + Warning + ------- + In this formulating the water table is referenced to a certain datum and + positive, not as depth below the surface! + """ hl, hw = _martens(series, normalize=normalize) + return (hw - hl).mean() - return hw.mean() - hl.mean() - -def interannual_variation(series: Series, normalize: bool = True) -> float: +def interannual_variation(series: Series, normalize: bool = False) -> float: """Interannual variation after :cite:t:`martens_groundwater_2013`. Parameters @@ -228,31 +397,35 @@ def interannual_variation(series: Series, normalize: bool = True) -> float: Returns ------- - float + float: + Interannual variation (s). Notes ----- - The average between the range in annually averaged 3 highest monthly groundwater - heads and the range in annually averaged 3 lowest monthly groundwater heads. + The average between the range in annually averaged 3 highest monthly heads and the + range in annually averaged 3 lowest monthly heads. + + Inter-yearly variation of high and low water table (s): - Inter-yearly variation of high and low water table (y): + s = ((max_HW - min_HW) + (max_LW - min_LW)) / 2 - y = ((max_HW - min_HW) + (max_LW - min_LW)) / 2 + A higher value of s indicates a more variable head, and vice versa. - Warning: In this formulating the water table is references to a certain datum and + Warning + ------- + In this formulating the water table is referenced to a certain datum and positive, not as depth below the surface! """ hl, hw = _martens(series, normalize=normalize) - - return (hw.max() - hw.min()) + (hl.max() - hl.min()) / 2 + return ((hw.max() - hw.min()) + (hl.max() - hl.min())) / 2 -def colwell_components( +def _colwell_components( series: Series, bins: int = 11, - freq: str = "M", + freq: str = "W", method: str = "mean", normalize: bool = True, ) -> Tuple[float, float, float]: @@ -266,7 +439,7 @@ def colwell_components( bins: int number of bins to determine the states of the groundwater. freq: str, optional - frequency to resample the series to. + frequency to resample the series to. Possible options are "D", "W", or "M". method: str, optional Method to use for resampling. Only "mean" is allowed now. normalize: bool, optional @@ -298,9 +471,20 @@ def colwell_components( binned = cut( series, bins=bins, right=False, include_lowest=True, labels=range(bins) ) - df = pd.DataFrame(binned) - df["time"] = df.index.month - df["values"] = 1 + df = DataFrame(binned, dtype=float) + + if freq == "M": + df["time"] = df.index.isocalendar().month + elif freq == "W": + df["time"] = df.index.isocalendar().week + elif freq == "D": + df["time"] = df.index.isocalendar().day + else: + msg = "freq %s is not a supported option." + logger.error(msg, freq) + raise ValueError(msg) + + df["values"] = 1.0 df = df.pivot_table(columns="head", index="time", aggfunc="sum", values="values") # Count of rows and column items @@ -313,8 +497,8 @@ def colwell_components( hxy = -(df / z * log(df / z, where=df.values != 0)).sum().sum() # Compute final components - p = 1 - (hxy - hy) / log(bins) # Predictability - c = 1 - hx / log(bins) # Constancy + p = 1 - (hxy - hx) / log(bins) # Predictability + c = 1 - hy / log(bins) # Constancy m = (hx + hy - hxy) / log(bins) # Contingency return p, c, m @@ -322,7 +506,7 @@ def colwell_components( def colwell_constancy( series: Series, bins: int = 11, - freq: str = "M", + freq: str = "W", method: str = "mean", normalize: bool = True, ) -> Tuple[float, float, float]: @@ -343,8 +527,8 @@ def colwell_constancy( Returns ------- - p, c, m: float, float, float - predictability, constancy, contingency + c: float + Colwell's constancy. Notes ----- @@ -352,7 +536,7 @@ def colwell_constancy( the absolute number of possible states. """ - return colwell_components( + return _colwell_components( series=series, bins=bins, freq=freq, method=method, normalize=normalize )[1] @@ -360,7 +544,7 @@ def colwell_constancy( def colwell_contingency( series: Series, bins: int = 11, - freq: str = "M", + freq: str = "W", method: str = "mean", normalize: bool = True, ) -> Tuple[float, float, float]: @@ -381,8 +565,8 @@ def colwell_contingency( Returns ------- - p, c, m: float, float, float - predictability, constancy, contingency + m: float + Colwell's contingency. Notes ----- @@ -392,13 +576,15 @@ def colwell_contingency( according to definition in information theory, see reference for details. """ - return colwell_components( + return _colwell_components( series=series, bins=bins, freq=freq, method=method, normalize=normalize )[2] -def low_pulse_count(series: Series, quantile: float = 0.2) -> int: - """Number of times the series drops below a certain threshold. +def low_pulse_count( + series: Series, quantile: float = 0.2, rolling_window: Union[str, None] = "7D" +) -> float: + """Average number of times the series exceeds a certain threshold per year. Parameters ---------- @@ -406,25 +592,47 @@ def low_pulse_count(series: Series, quantile: float = 0.2) -> int: Pandas Series with DatetimeIndex and head values. quantile: float, optional Quantile used as a threshold. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - int: - Number of times the series exceeds a certain threshold. + float: + Average number of times the series exceeds a certain threshold per year. Notes ----- - Number of times during which the groundwater head drops below a certain threshold. + Number of times during which the head drops below a certain threshold. The threshold is defined as the 20th percentile of non-exceedance :cite:t:`richter_method_1996`. + Warning + ------- + This method is sensitive to measurement noise, e.g., every change is sign in the + differences is counted as a pulse. Therefore, it is recommended to smooth the time + series first (which is also the default). + """ + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + h = series < series.quantile(quantile) - return (h.astype(int).diff() > 0).sum() + sel = h.astype(int).diff().replace(0.0, nan).shift(-1).dropna().index + # Deal with pulses in the beginning and end of the time series + if h.iloc[0]: + sel = sel.append(series.index[:1]).sort_values() + if h.iloc[-1]: + sel = sel.append(series.index[-1:]).sort_values() -def high_pulse_count(series: Series, quantile: float = 0.8) -> int: - """Number of times the series exceeds a certain threshold. + return sel.size / 2 / series.index.year.unique().size + + +def high_pulse_count( + series: Series, quantile: float = 0.8, rolling_window: Union[str, None] = "7D" +) -> float: + """Average number of times the series exceeds a certain threshold per year. Parameters ---------- @@ -432,23 +640,42 @@ def high_pulse_count(series: Series, quantile: float = 0.8) -> int: Pandas Series with DatetimeIndex and head values. quantile: float, optional Quantile used as a threshold. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - h: int - Number of times the series exceeds a certain threshold. + float: + Average number of times the series exceeds a certain threshold per year. Notes ----- - Number of times during which the groundwater head exceeds a certain threshold. - The threshold is defined as the 80th percentile of non-exceedance. + Number of times during which the head exceeds a certain threshold. The threshold is + defined as the 80th percentile of non-exceedance. + + Warning + ------- + This method is sensitive to measurement noise, e.g., every change is sign in the + differences is counted as a pulse. Therefore, it is recommended to smooth the time + series first (which is also the default). """ + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + h = series > series.quantile(quantile) - return (h.astype(int).diff() > 0).sum() + sel = h.astype(int).diff().replace(0.0, nan).shift(-1).dropna().index + if h.iloc[0]: + sel = sel.append(series.index[:1]).sort_values() + if h.iloc[-1]: + sel = sel.append(series.index[-1:]).sort_values() + return sel.size / 2 / series.index.year.unique().size -def low_pulse_duration(series: Series, quantile: float = 0.8) -> float: +def low_pulse_duration( + series: Series, quantile: float = 0.2, rolling_window: Union[str, None] = "7D" +) -> float: """Average duration of pulses where the head is below a certain threshold. Parameters @@ -457,23 +684,44 @@ def low_pulse_duration(series: Series, quantile: float = 0.8) -> float: Pandas Series with DatetimeIndex and head values. quantile: float, optional Quantile used as a threshold. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - float + float: + Average duration (in days) of pulses where the head drops below a certain + threshold. Notes ----- - Average duration of pulses where the groundwater head drops below a certain - threshold. The threshold is defined as the 20th percentile of non-exceedance. + Average duration of pulses (in days) where the head drops below a certain threshold. + + Warning + ------- + This method is sensitive to measurement noise, e.g., every change is sign in the + differences is counted as a pulse. Therefore, it is recommended to smooth the time + series first (which is also the default). """ + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + h = series < series.quantile(quantile) sel = h.astype(int).diff().replace(0.0, nan).shift(-1).dropna().index + + if h.iloc[0]: + sel = sel.append(series.index[:1]).sort_values() + if h.iloc[-1]: + sel = sel.append(series.index[-1:]).sort_values() + return (diff(sel.to_numpy()) / Timedelta("1D"))[::2].mean() -def high_pulse_duration(series: Series, quantile: float = 0.8) -> float: +def high_pulse_duration( + series: Series, quantile: float = 0.8, rolling_window: Union[str, None] = "7D" +) -> float: """Average duration of pulses where the head exceeds a certain threshold. Parameters @@ -482,42 +730,75 @@ def high_pulse_duration(series: Series, quantile: float = 0.8) -> float: Pandas Series with DatetimeIndex and head values. quantile: float, optional Quantile used as a threshold. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - float + float: + Average duration (in days) of pulses where the head drops below a certain + threshold. Notes ----- - Average duration of pulses where the groundwater head drops exceeds a certain - threshold. The threshold is defined as the 80th percentile of non-exceedance. + Average duration of pulses where the head drops exceeds a certain threshold. The + threshold is defined as the 80th percentile of non-exceedance. + + Warning + ------- + This method is sensitive to measurement noise, e.g., every change is sign in the + differences is counted as a pulse. Therefore, it is recommended to smooth the time + series first (which is also the default). """ + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + h = series > series.quantile(quantile) sel = h.astype(int).diff().replace(0.0, nan).shift(-1).dropna().index + + if h.iloc[0]: + sel = sel.append(series.index[:1]).sort_values() + if h.iloc[-1]: + sel = sel.append(series.index[-1:]).sort_values() + return (diff(sel.to_numpy()) / Timedelta("1D"))[::2].mean() -def amplitude_range(series: Series) -> float: - """Range of unscaled groundwater head. +def _get_differences(series: Series, normalize: bool = False) -> Series: + """Get the changes in the time series. Parameters ---------- series: pandas.Series Pandas Series with DatetimeIndex and head values. + normalize: bool, optional + normalize the time series to values between zero and one. Returns ------- - float + differences: pandas.Series + Differences in the time series in L/day. Notes ----- - Range of unscaled groundwater head. + Get the differences in the time series, and divide by the time step to get the rate + of change. If normalize is True, the time series is normalized to values between + zero and one. + """ - return series.max() - series.min() + if normalize: + series = _normalize(series) + + dt = diff(series.index.to_numpy()) / Timedelta("1D") + differences = series.diff().iloc[1:] / dt + return differences -def rise_rate(series: Series, normalize: bool = False) -> float: +def rise_rate( + series: Series, normalize: bool = False, rolling_window: Union[str, None] = "7D" +) -> float: """Mean of positive head changes from one day to the next. Parameters @@ -526,25 +807,33 @@ def rise_rate(series: Series, normalize: bool = False) -> float: Pandas Series with DatetimeIndex and head values. normalize: bool, optional normalize the time series to values between zero and one. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - float + float: + Mean of positive head changes from one day to the next. The units of the rise + rate are L/day (L defined by the input). Notes ----- Mean rate of positive changes in head from one day to the next. """ - if normalize: - series = _normalize(series) + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + + differences = _get_differences(series, normalize=normalize) + rises = differences[differences > 0] - difference = series.diff() - rises = difference[difference > 0] return rises.mean() -def fall_rate(series: Series, normalize: bool = False) -> float: +def fall_rate( + series: Series, normalize: bool = False, rolling_window: Union[str, None] = "7D" +) -> float: """Mean negative head changes from one day to the next. Parameters @@ -553,10 +842,15 @@ def fall_rate(series: Series, normalize: bool = False) -> float: Pandas Series with DatetimeIndex and head values. normalize: bool, optional normalize the time series to values between zero and one. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - float + float: + Mean of negative head changes from one day to the next. The units of the fall + rate are L/day (L defined by the input). Notes ----- @@ -564,15 +858,18 @@ def fall_rate(series: Series, normalize: bool = False) -> float: :cite:t:`richter_method_1996`. """ - if normalize: - series = _normalize(series) + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + + differences = _get_differences(series, normalize=normalize) + falls = differences.loc[differences < 0] - difference = series.diff() - falls = difference[difference < 0] return falls.mean() -def cv_rise_rate(series: Series, normalize: bool = False) -> float: +def cv_rise_rate( + series: Series, normalize: bool = True, rolling_window: Union[str, None] = "7D" +) -> float: """Coefficient of Variation in rise rate. Parameters @@ -581,25 +878,33 @@ def cv_rise_rate(series: Series, normalize: bool = False) -> float: Pandas Series with DatetimeIndex and head values. normalize: bool, optional normalize the time series to values between zero and one. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - float + float: + Coefficient of Variation in rise rate. Notes ----- - Coefficient of Variation in rise rate :cite:p:`richter_method_1996`. + Coefficient of variation in rise rate :cite:p:`richter_method_1996`. The higher the + coefficient of variation, the more variable the rise rate is, and vice versa. """ - if normalize: - series = _normalize(series) + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + + differences = _get_differences(series, normalize=normalize) + rises = differences[differences > 0] - difference = series.diff() - rises = difference[difference > 0] - return rises.std() / rises.mean() + return rises.std(ddof=1) / rises.mean() -def cv_fall_rate(series: Series, normalize: bool = False) -> float: +def cv_fall_rate( + series: Series, normalize: bool = False, rolling_window: Union[str, None] = "7D" +) -> float: """Coefficient of Variation in fall rate. Parameters @@ -608,27 +913,33 @@ def cv_fall_rate(series: Series, normalize: bool = False) -> float: Pandas Series with DatetimeIndex and head values. normalize: bool, optional normalize the time series to values between zero and one. + rolling_window: str, optional + Rolling window to use for smoothing the time series. Default is 7 days. Set to + None to disable. See the pandas documentation for more information. Returns ------- - float + float: + Coefficient of Variation in fall rate. Notes ----- - Coefficient of Variation in fall rate :cite:p:`richter_method_1996`. + Coefficient of Variation in fall rate :cite:p:`richter_method_1996`. The higher the + coefficient of variation, the more variable the fall rate is, and vice versa. """ - if normalize: - series = _normalize(series) + if rolling_window is not None: + series = series.rolling(rolling_window).mean() + + differences = _get_differences(series, normalize=normalize) + falls = differences[differences < 0] - difference = series.diff() - falls = difference[difference < 0] - return falls.std() / falls.mean() + return falls.std(ddof=1) / falls.mean() def magnitude(series: Series) -> float: - """Difference of peak head to base head, divided by base head after - :cite:t:`hannah_approach_2000`. + """Difference between the minimum and maximum heads, divided by the minimum head + adapted after :cite:t:`hannah_approach_2000`. Parameters ---------- @@ -637,13 +948,17 @@ def magnitude(series: Series) -> float: Returns ------- - float + float: + Difference between the minimum and maximum heads, divided by the minimum head. Notes ----- - Difference of peak head to base head, divided by base head. + Difference between the minimum and maximum heads, divided by the minimum head: - ..math:: (h_max - h_min ) / h_min + ..math:: + (h_max - h_min ) / h_min + + The higher the magnitude, the more variable the head is, and vice versa. """ @@ -660,16 +975,34 @@ def reversals_avg(series: Series) -> float: Returns ------- - float + float: + Average number of rises and falls in daily head per year. Notes ----- Average annual number of rises and falls (i.e., change of sign) in daily head - :cite:p:`richter_method_1996`. + :cite:p:`richter_method_1996`. The higher the number of reversals, the more + variable the head is, and vice versa. If the head data is not daily, a warning is + issued and nan is returned. """ - reversals = (series.diff() > 0).astype(int).diff().replace(-1, 1) - return reversals.resample("A").sum().mean() + # Get the time step in days + dt = diff(series.index.to_numpy()) / Timedelta("1D") + + # Check if the time step is approximately daily + if not (dt > 0.9).all() & (dt < 1.1).all(): + msg = ( + "The time step is not approximately daily (>10% of time steps are" + "non-daily). This may lead to incorrect results." + ) + logger.warning(msg) + return nan + else: + series_diff = series.diff() + reversals = ( + (series_diff[series_diff != 0.0] > 0).astype(int).diff().replace(-1, 1) + ) + return reversals.resample("A").sum().mean() def reversals_cv(series: Series) -> float: @@ -682,22 +1015,39 @@ def reversals_cv(series: Series) -> float: Returns ------- - float + float: + Coefficient of Variation in annual number of rises and falls. Notes ----- Coefficient of Variation in annual number of rises and falls in daily head - :cite:p:`richter_method_1996`. + :cite:p:`richter_method_1996`. If the coefficient of variation is high, the number + of reversals is highly variable, and vice versa. If the head data is not daily, a + warning is issued and nan is returned. """ - reversals = ( - (series.diff() > 0).astype(int).diff().replace(-1, 1).resample("A").sum() - ) - return reversals.std() / reversals.mean() + # Get the time step in days + dt = diff(series.index.to_numpy()) / Timedelta("1D") + + # Check if the time step is approximately daily + if not (dt > 0.9).all() & (dt < 1.1).all(): + msg = ( + "The time step is not approximately daily. " + "This may lead to incorrect results." + ) + logger.warning(msg) + return nan + else: + series_diff = series.diff() + reversals = ( + (series_diff[series_diff != 0.0] > 0).astype(int).diff().replace(-1, 1) + ) + annual_sum = reversals.resample("A").sum() + return annual_sum.std(ddof=1) / annual_sum.mean() -def mean_annual_maximum(series: Series, normalize: bool = False) -> float: - """Mean of annual maximum after :cite:t:`clausen_flow_2000`. +def mean_annual_maximum(series: Series, normalize: bool = True) -> float: + """Mean of annual maximum head after :cite:t:`clausen_flow_2000`. Parameters ---------- @@ -708,7 +1058,17 @@ def mean_annual_maximum(series: Series, normalize: bool = False) -> float: Returns ------- - float + float: + Mean of annual maximum head. + + Notes + ----- + Mean of annual maximum head :cite:p:`clausen_flow_2000`. + + Warning + ------- + This signatures is sensitive to the base level of the time series if normalize is + set to False. """ if normalize: @@ -717,7 +1077,7 @@ def mean_annual_maximum(series: Series, normalize: bool = False) -> float: return series.resample("A").max().mean() -def bimodality_coefficient(series: Series, normalize: bool = False) -> float: +def bimodality_coefficient(series: Series, normalize: bool = True) -> float: """Bimodality coefficient after :cite:t:`ellison_effect_1987`. Parameters @@ -729,43 +1089,136 @@ def bimodality_coefficient(series: Series, normalize: bool = False) -> float: Returns ------- - float + float: + Bimodality coefficient. Notes ----- Squared product moment skewness (s) plus one, divided by product moment kurtosis - (k). + (k): - ..math:: b = (s^2 + 1 ) / k + ..math:: + b = (s^2 + 1 ) / k - Adapted from the R 'modes' package. + Adapted from the R 'modes' package. The higher the bimodality coefficient, the more + bimodal the head distribution is, and vice versa. """ if normalize: series = _normalize(series) series = series.dropna() n = series.size + series_mean_diff = series - series.mean() + # Compute the skew for a finite sample skew = ( (1 / n) - * sum((series - series.mean()) ** 3) - / (((1 / n) * sum((series - series.mean()) ** 2)) ** 1.5) + * sum(series_mean_diff**3) + / (((1 / n) * sum(series_mean_diff**2)) ** 1.5) ) skew *= (sqrt(n * (n - 1))) / (n - 2) # Compute the kurtosis for a finite sample - kurt = (1 / n) * sum((series - series.mean()) ** 4) / ( - ((1 / n) * sum((series - series.mean()) ** 2)) ** 2 + kurt = (1 / n) * sum(series_mean_diff**4) / ( + ((1 / n) * sum(series_mean_diff**2)) ** 2 ) - 3 kurt = ((n - 1) * ((n + 1) * kurt - 3 * (n - 1)) / ((n - 2) * (n - 3))) + 3 return ((skew**2) + 1) / (kurt + ((3 * ((n - 1) ** 2)) / ((n - 2) * (n - 3)))) +def _get_events_binned( + series: Series, + normalize: bool = False, + up: bool = True, + bins: int = 300, + min_event_length: int = 10, + min_n_events: int = 2, +) -> Series: + """Get the recession or recovery events and bin them. + + Parameters + ---------- + series : Series + Pandas Series with DatetimeIndex and head values. + normalize : bool, optional + normalize the time series to values between zero and one. + up : bool, optional + If True, get the recovery events, if False, get the recession events. + bins : int, optional + Number of bins to bin the data to. + min_event_length : int, optional + Minimum length of an event in days. Events shorter than this are discarded. + min_n_events : int, optional + Minimum number of events in a bin. Bins with less events are discarded. + + Returns + ------- + Series: + Binned events. + + """ + if normalize: + series = _normalize(series) + + series.name = "difference" # Name the series for the split function + + # Get the negative differences + h = series.dropna().copy() + + # Set the negative differences to nan if up is True, and the positive differences + # to nan if up is False (down). + if up: + h[h.diff() < 0] = nan + else: + h[h.diff() > 0] = nan + + # Split the data into events + events = split(h, where(isnan(h.values))[0]) + events = [ev[~isnan(ev.values)] for ev in events if not isinstance(ev, ndarray)] + + events_new = [] + + for ev in events: + # Drop empty events and events shorter than min_events_length. + if ev.empty or ev.index.size < 2: + pass + else: + ev.index = (ev.index - ev.index[0]).days + if ev.index[-1] > min_event_length: + events_new.append(ev) + + if len(events_new) == 0: + return Series(dtype=float) + events = concat(events_new, axis=1) + + # Subtract the absolute value of the first day of each event + data = events - events.iloc[0, :] + data = data.loc[:, data.sum() != 0.0] # Drop columns with only zeros (no events) + + # Bin the data and compute the mean + binned = Series(dtype=float) + for g in data.groupby( + cut(data.index, bins=min(bins, data.index.max())), observed=False + ): + # Only use bins with more than 5 events + if g[1].dropna(axis=1).columns.size > min_n_events: + value = g[1].dropna(axis=1).mean(axis=1) + if not value.empty: + binned[g[0].mid] = value.iloc[0] + + binned = binned[binned != 0].dropna() + return binned + + def recession_constant( - series: Series, bins: int = 20, normalize: bool = False + series: Series, + bins: int = 300, + normalize: bool = False, + min_event_length: int = 10, + min_n_events: int = 2, ) -> float: - """Recession constant after :cite:t:`kirchner_catchments_2009`. + """Recession constant adapted after :cite:t:`kirchner_catchments_2009`. Parameters ---------- @@ -775,35 +1228,69 @@ def recession_constant( Number of bins to bin the data to. normalize: bool, optional normalize the time series to values between zero and one. + min_event_length: int, optional + Minimum length of an event in days. Events shorter than this are discarded. + min_n_events: int, optional + Minimum number of events in a bin. Bins with less events are discarded. Returns ------- - float + float: + Recession constant in days. Notes ----- - Slope of the linear model fitted to percentile‐wise binned means in a log‐log - plot of negative head versus negative head one time step ahead. + Recession constant adapted after :cite:t:`kirchner_catchments_2009`, which is the + decay constant of the exponential model fitted to the percentile-wise binned means + of the recession segments. The higher the recession constant, the slower the head + declines, and vice versa. The following function is fitted to the binned data + (similar to the Exponential response function in Pastas): - """ - if normalize: - series = _normalize(series) + ..math:: + h(t) = - h_0 * (1 - exp(-t / c)) - series.name = "diff" - df = pd.DataFrame(series.diff().loc[series.diff() < 0], columns=["diff"]) - df["head"] = series.loc[df.index] + where h(t) is the head at time t, h_0 is the final head as t goes to infinity, and + c is the recession constant. - binned = pd.Series(dtype=float) - for g in df.groupby(pd.cut(df["head"], bins=bins)): - binned[g[0].mid] = g[1]["diff"].mean() + """ + binned = _get_events_binned( + series, + normalize=normalize, + up=False, + bins=bins, + min_event_length=min_event_length, + min_n_events=min_n_events, + ) - binned = binned.dropna() - fit = linregress(log(binned.index), log(-binned.values)) + # Deal with the case that binned is empty + if binned.empty: + return nan + + # Fit an exponential model to the binned data and return the decay constant + f = lambda t, *p: -p[0] * (1 - exp(-t / p[1])) + popt, _ = curve_fit( + f, binned.index, binned.values, p0=[1, 100], bounds=(0, [100, 1e3]) + ) - return fit.slope + # Return nan and raise warning if the decay constant is close to the boundary + if isclose(popt[1], 0.0) or isclose(popt[1], 1e3): + msg = ( + "The estimated recession constant ({:.2f}) is close to the boundary. " + "This may lead to incorrect results.".format(popt[1]) + ) + logger.warning(msg) + return nan + else: + return popt[1] -def recovery_constant(series: Series, bins: int = 20, normalize: bool = False) -> float: +def recovery_constant( + series: Series, + bins: int = 300, + normalize: bool = False, + min_event_length: int = 10, + min_n_events: int = 2, +) -> float: """Recovery constant after :cite:t:`kirchner_catchments_2009`. Parameters @@ -814,45 +1301,71 @@ def recovery_constant(series: Series, bins: int = 20, normalize: bool = False) - Number of bins to bin the data to. normalize: bool, optional normalize the time series to values between zero and one. + min_event_length: int, optional + Minimum length of an event in days. Events shorter than this are discarded. + min_n_events: int, optional + Minimum number of events in a bin. Bins with less events are discarded. Returns ------- - float + float: + Recovery constant. Notes ----- - Slope of the linear model fitted to percentile‐wise binned means in a log‐log - plot of positive head versus positive head one time step ahead. + Time constant of the exponential function fitted to percentile-wise binned means + of the recovery segments. The higher the recovery constant, the slower the head + recovers, and vice versa. The following function is fitted to the binned data + (similar to the Exponential response function in Pastas): - """ - if normalize: - series = _normalize(series) + ..math:: + h(t) = h_0 * (1 - exp(-t / c)) - series.name = "diff" - df = pd.DataFrame(series.diff().loc[series.diff() > 0], columns=["diff"]) - df["head"] = series.loc[df.index] + where h(t) is the head at time t, h_0 is the final head as t goes to infinity, and + c is the recovery constant. + + """ + binned = _get_events_binned( + series, + normalize=normalize, + up=True, + bins=bins, + min_event_length=min_event_length, + ) - binned = pd.Series(dtype=float) - for g in df.groupby(pd.cut(df["head"], bins=bins)): - binned[g[0].mid] = g[1]["diff"].mean() + # Deal with the case that binned is empty + if binned.empty: + return nan - binned = binned.dropna() - fit = linregress(log(binned.index), log(binned.values)) + # Fit an exponential model to the binned data and return the recovery constant + f = lambda t, *p: p[0] * (1 - exp(-t / p[1])) + popt, _ = curve_fit( + f, binned.index, binned.values, p0=[1, 100], bounds=(0, [100, 1e3]) + ) - return fit.slope + # Return nan and raise warning if the recovery constant is close to the boundary + if isclose(popt[1], 0.0) or isclose(popt[1], 1e3): + msg = ( + "The estimated recovery constant ({:.2f}) is close to the boundary. " + "This may lead to incorrect results.".format(popt[1]) + ) + logger.warning(msg) + return nan + else: + return popt[1] def duration_curve_slope( - series: Series, l: float = 0.1, u: float = 0.9, normalize: bool = True + series: Series, l: float = 0.1, u: float = 0.9, normalize: bool = False ) -> float: - """Slope of the duration curve between percentile l and u after + """Slope of the head duration curve between percentile l and u after :cite:t:`oudin_are_2010`. Parameters ---------- series: pandas.Series Pandas Series with DatetimeIndex and head values. - l: float + l: float, optional lower percentile, a float between 0 and 1, lower than u. u: float, optional upper percentile, a float between 0 and 1, higher than l. @@ -861,28 +1374,38 @@ def duration_curve_slope( Returns ------- - float + float: + Slope of the head duration curve between percentile l and u. Notes ----- - Slope of the duration curve (analogue flow duration curve for streamflow) between - percentile l and u. + Slope of the head duration curve between percentile l and u. The more negative the + slope, the more values are above or below the percentile l and u, and vice versa. + + Note that the slope is negative, contrary to the flow duration curve commonly used + in surface water hydrology. """ if normalize: series = _normalize(series) + # Get the series between the percentiles s = series[ - (series.quantile(l) > series) & (series < series.quantile(u)) - ].sort_values() - s.index = arange(s.size) / s.size + (series > series.quantile(l)) & (series < series.quantile(u)) + ].sort_values(ascending=False) + + # Deal with the case that s is empty + if s.empty: + return nan + + s.index = linspace(0, 1, s.size) return linregress(s.index, s.values).slope -def duration_curve_range( +def duration_curve_ratio( series: Series, l: float = 0.1, u: float = 0.9, normalize: bool = True ) -> float: - """Range of the duration curve between the percentile l and u after + """Ratio of the head duration curve between the percentile l and u after :cite:t:`richards_measures_1990`. Parameters @@ -898,17 +1421,19 @@ def duration_curve_range( Returns ------- - float + float: + Ratio of the duration curve between the percentile l and u. Notes ----- - Range of the duration curve between the percentile l and u. + Ratio of the duration curve between the percentile l and u. The higher the ratio, + the flatter the head duration curve, and vice versa. """ if normalize: series = _normalize(series) - return series.quantile(u) - series.quantile(l) + return series.quantile(l) / series.quantile(u) def richards_pathlength(series: Series, normalize: bool = True) -> float: @@ -924,12 +1449,13 @@ def richards_pathlength(series: Series, normalize: bool = True) -> float: Returns ------- - float + float: + The path length of the time series, standardized by time series length and + median. Notes ----- - The path length of the time series, standardized by time series length and the - median head. + The path length of the time series, standardized by time series length and median. """ if normalize: @@ -938,38 +1464,15 @@ def richards_pathlength(series: Series, normalize: bool = True) -> float: series = series.dropna() dt = diff(series.index.to_numpy()) / Timedelta("1D") dh = series.diff().dropna() - # sum(dt) is more fair with irregular time series - return sum(sqrt(dh**2 + dt**2)) / sum(dt) - - -def richards_baker_index(series: Series, normalize: bool = True) -> float: - """Richards-Baker index according to :cite:t:`baker_new_2004`. - - Parameters - ---------- - series: pandas.Series - Pandas Series with DatetimeIndex and head values. - normalize: bool, optional - normalize the time series to values between zero and one. - Returns - ------- - float - - Notes - ----- - Sum of absolute values of day‐to‐day changes in head divided by the sum of scaled - daily head. Equivalent the Richards Pathlength without the time component. - - """ - if normalize: - series = _normalize(series) - - return series.diff().dropna().abs().sum() / series.sum() + # sum(dt) is more fair with irregular time series + return sum(sqrt(dh**2 + dt**2)) / (sum(dt) * series.median()) -def _baseflow(series: Series, normalize: bool = True) -> Tuple[Series, Series]: - """Baseflow function for the baseflow index and stability. +def _baselevel( + series: Series, normalize: bool = True, period="30D" +) -> Tuple[Series, Series]: + """Baselevel function for the baselevel index and stability. Parameters ---------- @@ -977,6 +1480,9 @@ def _baseflow(series: Series, normalize: bool = True) -> Tuple[Series, Series]: Pandas Series with DatetimeIndex and head values. normalize: bool, optional normalize the time series to values between zero and one. + period: str, optional + Period to resample the time series to in days (e.g., '10D' or '90D'). Default + is 30 days. Returns ------- @@ -984,33 +1490,38 @@ def _baseflow(series: Series, normalize: bool = True) -> Tuple[Series, Series]: Pandas Series of the original for ht: pandas.Series Pandas Series for the base head + """ if normalize: series = _normalize(series) - # A/B. Selecting minima hm over 5-day periods - hm = series.resample("5D").min().dropna() + # A/B. Selecting minima hm over a period + hm = series.resample(period).min().dropna() + series = series.resample("D").interpolate() # C. define the turning point ht (0.9 * head < adjacent heads) - ht = pd.Series(dtype=float) + ht = Series(index=[hm.index[0]], data=[hm.iloc[0]], dtype=float) + for i, h in enumerate(hm.iloc[1:-1], start=1): - if (h < hm.iloc[i - 1]) & (h < hm.iloc[i + 1]): + if (0.9 * h < hm.iloc[i - 1]) & (0.9 * h < hm.iloc[i + 1]): ht[hm.index[i]] = h + ht[hm.index[-1]] = hm.iloc[-1] + # ensure that index is a DatetimeIndex - ht.index = pd.to_datetime(ht.index) + ht.index = to_datetime(ht.index) # D. Interpolate ht = ht.resample("D").interpolate() # E. Assign a base head to each day - ht[ht > series.resample("D").mean().loc[ht.index]] = series.resample("D").mean() + ht[ht > series.loc[ht.index]] = series return series, ht -def baseflow_index(series: Series, normalize: bool = True) -> float: - """Baseflow index according to :cite:t:`organization_manual_2008`. +def baselevel_index(series: Series, normalize: bool = True, period="30D") -> float: + """Base level index (BLI) adapted after :cite:t:`organization_manual_2008`. Parameters ---------- @@ -1018,27 +1529,31 @@ def baseflow_index(series: Series, normalize: bool = True) -> float: Pandas Series with DatetimeIndex and head values. normalize: bool, optional normalize the time series to values between zero and one. + period: str, optional + Period to resample the time series to in days (e.g., '10D' or '90D'). Default + is 30 days. Returns ------- - float + float: + Base level index. Notes ----- - Adapted analogously to its application in streamflow. Here, a baseflow time - series is separated from a 5‐day minimum groundwater head in a moving window. BFI + Adapted analogously to its application in streamflow. Here, a baselevel time + series is separated from a X-day minimum head in a moving window. BLI equals the total sum of heads of original time series divided by the total sum of - heads from the baseflow type of time series. + heads from the baselevel time series. Both these time series are resampled to daily + heads by interpolation for consistency. """ - series, ht = _baseflow(series, normalize=normalize) + series, ht = _baselevel(series, normalize=normalize, period=period) + return ht.sum() / series.sum() - return series.resample("D").mean().sum() / ht.sum() - -def baseflow_stability(series: Series, normalize: bool = True) -> float: - """Baseflow stability after :cite:t:`heudorfer_index-based_2019`. +def baselevel_stability(series: Series, normalize: bool = True, period="30D") -> float: + """Baselevel stability after :cite:t:`heudorfer_index-based_2019`. Parameters ---------- @@ -1046,163 +1561,218 @@ def baseflow_stability(series: Series, normalize: bool = True) -> float: Pandas Series with DatetimeIndex and head values. normalize: bool, optional normalize the time series to values between zero and one. + period: str, optional + Period to resample the time series to, in days (e.g., '10D' or '90D'). Default + is 30 days. Returns ------- - float + float: + Base level stability. Notes ----- Originally developed for streamflow, here the Base Flow Index algorithm is analogously adapted to groundwater time series as a filter to separate the slow - component (“baseflow”) of the time series. Then, the mean annual baseflow is - calculated. Base Flow Stability is the difference of maximum and minimum annual - baseflow. + component (“base level") of the time series. Then, the mean annual base level is + calculated. Base Level Stability is the difference of maximum and minimum annual + base level. """ - series, ht = _baseflow(series, normalize=normalize) + _, ht = _baselevel(series, normalize=normalize, period=period) return ht.resample("A").mean().max() - ht.resample("A").mean().min() -def hurst_exponent(series: Series): - """Hurst exponent according to :cite:t:`wang_characteristic-based_2006`. - - Returns - ------- - - Notes - ----- - The slope of a linear model fitted to the relationship between the sample size - and the logarithmized sample range of k contiguous subsamples from the time series. - - """ - return NotImplementedError - +def autocorr_time(series: Series, cutoff: float = 0.8, **kwargs) -> float: + """Lag where the autocorrelation function exceeds a cut-off value. -def autocorr(series: Series, freq: str = "w"): - """Lag where the first peak in the autocorrelation function occurs after - :cite:t:`wang_characteristic-based_2006`. + Parameters + ---------- + series: pandas.Series + Pandas Series with DatetimeIndex and head values. + cutoff: float, optional + Cut-off value for the autocorrelation function. Default is 0.7. + kwargs: dict, optional + Additional keyword arguments are passed to the pastas acf method. Returns ------- + float: + Lag in days where the autocorrelation function exceeds the cutoff value. Notes ----- - Lag where the first peak in the autocorrelation function occurs. + Lag in days where the autocorrelation function exceeds the cutoff value for the + first time. The higher the lag, the more autocorrelated the time series is, and + vice versa. In practical terms higher values mean that the groundwater system has + a longer memory and the response to changes in the forcing are visible longer in + the head time series. """ - return NotImplementedError - - -def lyapunov_exponent(series: Series): - """The exponential rate of divergence of nearby data points after - :cite:t:`hilborn_chaos_2000`. + c = acf(series.dropna(), **kwargs) # Compute the autocorrelation function - Returns - ------- - - Notes - ----- - The exponential rate of divergence of nearby data points when moving away in time - from a certain data point in the series. Iteratively estimated for every point in - the time series, then averaged. + if c.min() > cutoff: + return nan + else: + return (c < cutoff).idxmax() / Timedelta("1D") - """ - return NotImplementedError +def _date_min_max(series: Series, stat: str) -> float: + """Compute the average date of the minimum head value with circular statistics. -def peak_timescale(series: Series): - """Area under peak divided by difference of peak head to peak base after - :cite:t:`gaal_flood_2012`. + Parameters + ---------- + series: pandas.Series + Pandas Series with DatetimeIndex and head values. + stat: str + Either "min" or "max". If "min", the average date of the minimum head value is + computed. If "max", the average date of the maximum head value is computed. Returns ------- + float: + Average date of the minimum or maximum head value. Notes ----- - Area under peak divided by difference of peak head to peak base, averaged over - all peaks. + The average date is computed by taking the average of the day of the year of the + minimum head value for each year, using circular statistics. We refer to + :cite:t:`fisher_statistical_1995` (page 31) for more information on circular + statistics. """ - return NotImplementedError - - -def excess_mass(series: Series): - """Test statistic of the dip test, after :cite:t:`hartigan_dip_1985`. - - Returns - ------- + # Get the day of the year of the minimum head value for each year + if stat == "min": + data = series.groupby(series.index.year).idxmin(skipna=True).dropna().values + elif stat == "max": + data = series.groupby(series.index.year).idxmax(skipna=True).dropna().values + + doy = DatetimeIndex(data).dayofyear.to_numpy(float) + + m = 365.25 + two_pi = 2 * pi + + thetas = array(doy) * two_pi / m + c = cos(thetas).sum() + s = sin(thetas).sum() + + if (s > 0) & (c > 0): + mean_theta = arctan(s / c) + elif c < 0: + mean_theta = arctan(s / c) + pi + elif (s < 0) & (c > 0): + mean_theta = arctan(s / c) + two_pi + else: + # This should never happen + raise ValueError("Something went wrong in the circular statistics.") - Notes - ----- - Test statistic of the dip test; maximum distance between the empirical - distribution and the best fitting unimodal distribution. By default, the best - fitting distribution is the uniform. + return mean_theta * 365.25 / two_pi - """ - return NotImplementedError +def date_min(series: Series) -> float: + """Compute the average date of the minimum head value with circular statistics. -def critical_bandwidth(series: Series): - """Test statistic of the Silverman test, after :cite:t:`silverman_using_1981`. + Parameters + ---------- + series: pandas.Series + Pandas Series with DatetimeIndex and head values. Returns ------- + float: + Average date of the minimum head value. Notes ----- - Test statistic of the Silverman test; minimum kernel bandwidth required to create - an unimodal distribution estimated by fitting a Kernel Density Estimation. + Average date of the minimum head value. The higher the date, the later the minimum + head value occurs in the year, and vice versa. + + The average date is computed by taking the average of the day of the year of the + minimum head value for each year, using circular statistics. We refer to + :cite:t:`fisher_statistical_1995` (page 31) for more information on circular + statistics. """ - return NotImplementedError + return _date_min_max(series, "min") -def peak_base_time(series: Series): - """Difference between peak and base head, standardized by duration of peak after - :cite:t:`heudorfer_index-based_2019`. +def date_max(series: Series) -> float: + """Compute the average date of the maximum head value with circular statistics. + + Parameters + ---------- + series: pandas.Series + Pandas Series with DatetimeIndex and head values. Returns ------- + float: + Average date of the maximum head value. Notes ----- - Difference between peak and base head, standardized by duration of peak. + Average date of the maximum head value. The higher the date, the later the maximum + head value occurs in the year, and vice versa. + + The average date is computed by taking the average of the day of the year of the + maximum head value for each year, using circular statistics. We refer to + :cite:t:`fisher_statistical_1995` (page 31) for more information on circular + statistics. """ - return NotImplementedError + return _date_min_max(series, "max") -def summary(series: Series, signatures: Optional[list] = None) -> Series: +def summary( + data: Union[DataFrame, Series], signatures: Optional[list] = None +) -> DataFrame: """Method to get many signatures for a time series. Parameters ---------- - series: pandas.Series - pandas Series with DatetimeIndex + data: Union[pandas.DataFrame, pandas.Series] + pandas DataFrame or Series with DatetimeIndex signatures: list - By default all available signatures are returned. + list of signatures to return. By default all available signatures are returned. Returns ------- - data: pandas.Series - Pandas series with every row a signature + result: pandas.DataFrame + Pandas DataFrame with every row a signature and the signature value for each column. Examples -------- - >>> idx = pd.date_range("2000", "2010") - >>> head = pd.Series(index=idx, data=np.random.rand(len(idx)), dtype=float) - >>> ps.stats.signatures.summary(head) + >>> idx = date_range("2000", "2010") + >>> data = np.random.rand(len(idx), 3) + >>> df = DataFrame(index=idx, data=data, columns=["A", "B", "C"], dtype=float) + >>> ps.stats.signatures.summary(df) + """ if signatures is None: signatures = __all__ - data = pd.Series(index=signatures, dtype=float) + if isinstance(data, DataFrame): + result = DataFrame(index=signatures, columns=data.columns, dtype=float) + elif isinstance(data, Series): + result = DataFrame(index=signatures, columns=[data.name], dtype=float) + else: + raise ValueError("Invalid data type. Expected DataFrame or Series.") + # Get the signatures for signature in signatures: + # Check if the signature is valid + if signature not in __all__: + msg = "Signature %s is not a valid signature." + logger.error(msg, signature) + raise ValueError(msg % signature) + + # Get the function and compute the signature for each column/series func = getattr(ps.stats.signatures, signature) - data.loc[signature] = func(series) + if isinstance(data, DataFrame): + result.loc[signature] = data.apply(func) + elif isinstance(data, Series): + result.loc[signature] = func(data) - return data + return result diff --git a/tests/test_signatures.py b/tests/test_signatures.py index 3ee1f908..2c4588ff 100644 --- a/tests/test_signatures.py +++ b/tests/test_signatures.py @@ -10,25 +10,20 @@ def test_summary(): head = Series(index=idx, data=np.random.rand(len(idx)), dtype=float) ps.stats.signatures.summary(head) +def test_date_min(): + """Test the date_min signature to have a mean of 1 if the head is at a minimum at + the first of january of every year in a time series. + """ + idx = date_range("2000", "2010") + head = Series(index=idx, data=np.random.rand(len(idx)), dtype=float) + head[head.index.dayofyear == 1] = head.min() + assert round(ps.stats.signatures.date_min(head)) == 1 -def test_colwell_components(collwell_data: Series) -> None: - ps.stats.signatures.colwell_components(collwell_data, freq="4M", bins=3) - return - - -def test_colwell_predictability(collwell_data: Series) -> None: - """Example Tree C from the publication.""" - p = ps.stats.signatures.colwell_components(collwell_data, freq="4M", bins=3)[0] - assert round(p, 2) == 1.0 - - -def test_colwell_constancy(collwell_data: Series) -> None: - """Example Tree C from the publication.""" - c = ps.stats.signatures.colwell_components(collwell_data, freq="4M", bins=3)[1] - assert round(c, 2) == 0.42 - - -def test_colwell_contingency(collwell_data: Series) -> None: - """Example Tree C from the publication.""" - m = ps.stats.signatures.colwell_components(collwell_data, freq="4M", bins=3)[2] - assert round(m, 2) == 0.58 +def test_date_max(): + """Test the date_max signature to have a mean of 1 if the head is at a maximum at + the first of january of every year in a time series. + """ + idx = date_range("2000", "2010") + head = Series(index=idx, data=np.random.rand(len(idx)), dtype=float) + head[head.index.dayofyear == 1] = head.max() + assert round(ps.stats.signatures.date_max(head)) == 1 From 1055c48df916b491fd6bc35b70bb83900e7bdf58 Mon Sep 17 00:00:00 2001 From: Raoul Collenteur Date: Mon, 19 Feb 2024 07:53:39 +0100 Subject: [PATCH 08/10] Quick Fix issue 685 --- doc/examples/fix_parameters.ipynb | 31 +++++++++++++++++++++++++++---- doc/examples/index.rst | 2 +- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/doc/examples/fix_parameters.ipynb b/doc/examples/fix_parameters.ipynb index b47843d2..21c992cf 100644 --- a/doc/examples/fix_parameters.ipynb +++ b/doc/examples/fix_parameters.ipynb @@ -5,7 +5,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Fixating parameters while fitting\n", + "# Fixing parameters while fitting\n", " \n", "*Developed by Mark Bakker, TU Delft*" ] @@ -203,6 +203,15 @@ "ml.solve(tmin=\"1985\", tmax=\"2010\", solver=ps.LeastSquares())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fixing a parameter\n", + "\n", + "Now let's fix the parameter recharge$_n$ and solve the model again. We can do this by using the `Model.set_parameter` method and setting `vary=False`. For the sake of this example, we also fix the parameter to a value of 1.0, such that the Gamma response function becomes an Exponential function." + ] + }, { "cell_type": "code", "execution_count": null, @@ -212,10 +221,17 @@ "ml = ps.Model(ho)\n", "sm1 = ps.StressModel(recharge, ps.Gamma(), name=\"recharge\", settings=\"prec\")\n", "ml.add_stressmodel(sm1)\n", - "ml.set_parameter(\"recharge_n\", vary=False)\n", + "ml.set_parameter(\"recharge_n\", vary=False, initial=1)\n", "ml.solve(tmin=\"1985\", tmax=\"2010\", solver=ps.LeastSquares())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the fit report, you will see that the parameter recharge$_n$ is now not varied anymore and kept to the value of one." + ] + }, { "cell_type": "code", "execution_count": null, @@ -224,12 +240,19 @@ "source": [ "ml.plot(figsize=(10, 4));" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "pastas_dev", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -243,7 +266,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:41:22) [MSC v.1929 64 bit (AMD64)]" + "version": "3.11.3" }, "vscode": { "interpreter": { diff --git a/doc/examples/index.rst b/doc/examples/index.rst index 46308904..cbbb0dbc 100644 --- a/doc/examples/index.rst +++ b/doc/examples/index.rst @@ -47,7 +47,7 @@ Basics .. _Preprocessing user-provided time series: prepare_timeseries.html .. _A basic model: basic_model.html -.. _Fixating parameters while fitting: fix_parameters.html +.. _Fixing parameters while fitting: fix_parameters.html .. _Calibration: calibration_options.html .. _Modeling with different timesteps: modeling_timestep.html From 3c3ff14eecb218ad9c6d64e48e3fb573508d9c7a Mon Sep 17 00:00:00 2001 From: Martin Vonk <66305055+martinvonk@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:00:28 +0100 Subject: [PATCH 09/10] Resolve Pandas 2.2 (FutureWarning) issues (#684) Co-authored-by: Raoul Collenteur --- doc/examples/recharge_estimation.ipynb | 1 + doc/examples/uncertainty.ipynb | 2 ++ pastas/io/base.py | 4 +++- pastas/io/pas.py | 5 +++-- pastas/model.py | 27 ++++++++++++++------------ pastas/plotting/modelcompare.py | 12 +++++++----- pastas/plotting/modelplots.py | 10 +++++++--- pastas/plotting/plotutil.py | 6 +++--- pastas/stats/core.py | 4 ++-- pastas/stats/dutch.py | 14 ++++++------- pastas/stats/tests.py | 12 +++--------- pastas/stressmodels.py | 8 +------- pastas/timeseries.py | 4 ++-- pastas/timeseries_utils.py | 10 +++++----- pyproject.toml | 4 ++-- tests/test_gxg.py | 6 +++--- tests/test_model.py | 8 ++++---- tests/test_rfuncs.py | 6 +++--- 18 files changed, 73 insertions(+), 70 deletions(-) diff --git a/doc/examples/recharge_estimation.ipynb b/doc/examples/recharge_estimation.ipynb index dba01395..117126c3 100644 --- a/doc/examples/recharge_estimation.ipynb +++ b/doc/examples/recharge_estimation.ipynb @@ -103,6 +103,7 @@ "dt = 10\n", "freq = \"10D\"\n", "h = head.iloc[0::dt]\n", + "h = h.loc[~h.index.duplicated(keep='first')]\n", "\n", "mls = {\n", " \"Linear\": [ps.FourParam(), ps.rch.Linear()],\n", diff --git a/doc/examples/uncertainty.ipynb b/doc/examples/uncertainty.ipynb index 1060bcef..4648c1b0 100644 --- a/doc/examples/uncertainty.ipynb +++ b/doc/examples/uncertainty.ipynb @@ -59,6 +59,8 @@ " .loc[\"2006\":]\n", " .iloc[0::10]\n", ")\n", + "gwl = gwl.loc[~gwl.index.duplicated(keep='first')]\n", + "\n", "evap = pd.read_csv(\n", " \"data_wagna/evap_wagna.csv\", index_col=0, parse_dates=True, skiprows=2\n", ").squeeze()\n", diff --git a/pastas/io/base.py b/pastas/io/base.py index 92679ac5..e05c765d 100644 --- a/pastas/io/base.py +++ b/pastas/io/base.py @@ -138,7 +138,9 @@ def _load_model(data: dict) -> Model: # Add parameters, use update to maintain correct order ml.parameters = ml.get_init_parameters(noise=ml.settings["noise"]) ml.parameters.update(data["parameters"]) - ml.parameters = ml.parameters.apply(to_numeric, errors="ignore") + + # Convert parameters to numeric + ml.parameters = ml.parameters.infer_objects() # When initial values changed for param, value in ml.parameters.loc[:, "initial"].items(): diff --git a/pastas/io/pas.py b/pastas/io/pas.py index 2038764f..7922f7fa 100644 --- a/pastas/io/pas.py +++ b/pastas/io/pas.py @@ -8,6 +8,7 @@ import datetime import json from collections import OrderedDict +from io import StringIO as stringIO from logging import getLogger from pandas import ( @@ -38,7 +39,7 @@ def pastas_hook(obj: dict): obj[key] = val elif key == "series": try: - obj[key] = read_json(value, typ="series", orient="split") + obj[key] = read_json(stringIO(value), typ="series", orient="split") except: obj[key] = value if isinstance(obj[key], Series): @@ -52,7 +53,7 @@ def pastas_hook(obj: dict): # Necessary to maintain order when using the JSON format! value = json.loads(value, object_pairs_hook=OrderedDict) param = DataFrame(data=value, columns=value.keys()).T - obj[key] = param.apply(to_numeric, errors="ignore") + obj[key] = param.infer_objects() else: try: obj[key] = json.loads(value, object_hook=pastas_hook) diff --git a/pastas/model.py b/pastas/model.py index e7b493d9..5c269f84 100644 --- a/pastas/model.py +++ b/pastas/model.py @@ -406,9 +406,7 @@ def simulate( if p is None: p = self.get_parameters() - sim = Series( - data=np.zeros(sim_index.size, dtype=float), index=sim_index, fastpath=True - ) + sim = Series(data=np.zeros(sim_index.size, dtype=float), index=sim_index) istart = 0 # Track parameters index to pass to stressmodel object for sm in self.stressmodels.values(): @@ -1195,7 +1193,7 @@ def get_init_parameters( if noise is None: noise = self.settings["noise"] - frames = [DataFrame(columns=self.parameters.columns)] + frames = [] for sm in self.stressmodels.values(): frames.append(sm.parameters) @@ -1206,14 +1204,19 @@ def get_init_parameters( if self.noisemodel and noise: frames.append(self.noisemodel.parameters) - parameters = concat(frames) - parameters = parameters.infer_objects() + if not frames: + parameters = DataFrame(columns=self.parameters.columns) + else: + parameters = concat(frames) + parameters = parameters.infer_objects() + parameters["stderr"] = np.nan + parameters["optimal"] = np.nan # Set initial parameters to optimal parameters from model if not initial: - parameters.initial.update(self.parameters.optimal) - parameters.optimal.update(self.parameters.optimal) - parameters.stderr.update(self.parameters.stderr) + parameters.update({"initial": self.parameters.loc[:, "optimal"]}) + parameters.update({"optimal": self.parameters.loc[:, "optimal"]}) + parameters.update({"stderr": self.parameters.loc[:, "stderr"]}) return parameters @@ -1796,9 +1799,9 @@ def fit_report( "Interp.": "Yes" if self.interpolate_simulation else "No", } - parameters = self.parameters.loc[:, ["optimal", "stderr", "initial", "vary"]] - stderr = parameters.loc[:, "stderr"] / parameters.loc[:, "optimal"] - parameters.loc[:, "stderr"] = "±" + stderr.abs().apply( + parameters = self.parameters.loc[:, ["optimal", "initial", "vary"]].copy() + stderr = self.parameters.loc[:, "stderr"] / self.parameters.loc[:, "optimal"] + parameters.loc[:, "stderr"] = stderr.abs().apply( _table_formatter_stderr, na_rep="nan" ) diff --git a/pastas/plotting/modelcompare.py b/pastas/plotting/modelcompare.py index 9556c08b..39c5a783 100644 --- a/pastas/plotting/modelcompare.py +++ b/pastas/plotting/modelcompare.py @@ -401,12 +401,14 @@ def get_diagnostics( else: modelnames = [iml.name for iml in models] - diags = DataFrame(index=modelnames) - for i, ml in enumerate(models): + diags = [] + for ml in models: mldiag = ml.stats.diagnostics() - diags.loc[modelnames[i], mldiag.index] = mldiag[diag_col].values + diags.append(mldiag.loc[:, diag_col]) - return diags.transpose() + diags = concat(diags, axis=1, sort=False) + diags.columns = modelnames + return diags def plot_oseries(self, axn: str = "sim") -> None: """Plot all oseries, unless all oseries are the same. @@ -814,7 +816,7 @@ def plot_table_params( self.models, param_selection=param_selection, param_col=param_col, - ).applymap(_table_formatter_params) + ).apply(lambda x: x.apply(_table_formatter_params), axis=1) # add separate column with parameter names params.loc[:, "Parameters"] = params.index diff --git a/pastas/plotting/modelplots.py b/pastas/plotting/modelplots.py index 7ef008fa..b20b2d5f 100644 --- a/pastas/plotting/modelplots.py +++ b/pastas/plotting/modelplots.py @@ -321,10 +321,14 @@ def results( loc="left", fontsize=plt.rcParams["legend.fontsize"], ) - p = self.ml.parameters.copy().loc[:, ["name", "optimal", "stderr"]] + p = self.ml.parameters.loc[:, ["name"]].copy() p.loc[:, "name"] = p.index - stderr = p.loc[:, "stderr"] / p.loc[:, "optimal"] - p.loc[:, "optimal"] = p.loc[:, "optimal"].apply(_table_formatter_params) + stderr = ( + self.ml.parameters.loc[:, "stderr"] / self.ml.parameters.loc[:, "optimal"] + ) + p.loc[:, "optimal"] = self.ml.parameters.loc[:, "optimal"].apply( + _table_formatter_params + ) p.loc[:, "stderr"] = stderr.abs().apply(_table_formatter_stderr) ax3.axis("off") diff --git a/pastas/plotting/plotutil.py b/pastas/plotting/plotutil.py index 27161fc5..230077ba 100644 --- a/pastas/plotting/plotutil.py +++ b/pastas/plotting/plotutil.py @@ -45,11 +45,11 @@ def _table_formatter_stderr(s: float, na_rep: str = "") -> str: if np.isnan(s): return na_rep elif np.floor(np.log10(np.abs(s))) <= -4: - return f"{s * 100.:.2e}%" + return f"±{s * 100.:.2e}%" elif np.floor(np.log10(np.abs(s))) > 3: - return f"{s * 100.:.2e}%" + return f"±{s * 100.:.2e}%" else: - return f"{s:.2%}" + return f"±{s:.2%}" def _get_height_ratios(ylims: List[Union[list, tuple]]) -> List[float]: diff --git a/pastas/stats/core.py b/pastas/stats/core.py index e5bd9e72..d0d10364 100644 --- a/pastas/stats/core.py +++ b/pastas/stats/core.py @@ -23,7 +23,7 @@ pi, sqrt, ) -from pandas import DataFrame, Series, Timedelta, TimedeltaIndex +from pandas import DataFrame, Index, Series, Timedelta, to_timedelta from scipy.stats import norm from pastas.typing import ArrayLike @@ -195,7 +195,7 @@ def ccf( result = DataFrame( data={"ccf": c, "conf": conf, "n": b}, dtype=float, - index=TimedeltaIndex(lags, unit="D", name="Lags"), + index=Index(to_timedelta(lags, unit="D"), name="Lags"), ) result = result.where(result.n > min_obs).dropna() diff --git a/pastas/stats/dutch.py b/pastas/stats/dutch.py index 44f03739..7732df16 100644 --- a/pastas/stats/dutch.py +++ b/pastas/stats/dutch.py @@ -101,7 +101,7 @@ def q_gvg( inspring = _in_spring(series) if any(inspring): if by_year: - return series.loc[inspring].resample("a").median().mean() + return series.loc[inspring].resample("A").median().mean() else: return series.loc[inspring].median() else: @@ -117,7 +117,7 @@ def ghg( output: str = "mean", min_n_meas: int = 16, min_n_years: int = 8, - year_offset: str = "a-mar", + year_offset: str = "A-MAR", ) -> Union[Series, float]: """Calculate the 'Gemiddelde Hoogste Grondwaterstand' (Average High Groundwater Level) @@ -205,7 +205,7 @@ def glg( output: str = "mean", min_n_meas: int = 16, min_n_years: int = 8, - year_offset: str = "a-mar", + year_offset: str = "A-MAR", ) -> Union[Series, float]: """Calculate the 'Gemiddelde Laagste Grondwaterstand' (Average Low GW Level). @@ -292,7 +292,7 @@ def gvg( output: str = "mean", min_n_meas: int = 2, min_n_years: int = 8, - year_offset: str = "a", + year_offset: str = "A", ) -> Union[Series, float]: """Calculate the 'Gemiddelde Voorjaars Grondwaterstand' (Average Spring GW Level). @@ -367,7 +367,7 @@ def gg( output: str = "mean", min_n_meas: int = 16, min_n_years: int = 8, - year_offset: str = "a-mar", + year_offset: str = "A-MAR", ) -> Union[Series, float]: """Calculate the 'Gemiddelde Grondwaterstand' (Average Groundwater Level). @@ -469,7 +469,7 @@ def _in_spring(series: Series) -> Series: def isinspring(x): return ((x.month == 3) and (x.day >= 14)) or ((x.month == 4) and (x.day < 15)) - return Series(series.index.map(isinspring), index=series.index) + return Series([isinspring(x) for x in series.index], index=series.index) def _gxg( @@ -652,6 +652,6 @@ def _q_gxg( series = series.loc[:tmax] series = series.resample("d").median() if by_year: - return series.resample("a").apply(lambda s: s.quantile(q)).mean() + return series.resample("A").apply(lambda s: s.quantile(q)).mean() else: return series.quantile(q) diff --git a/pastas/stats/tests.py b/pastas/stats/tests.py index a4c27956..66c019ab 100644 --- a/pastas/stats/tests.py +++ b/pastas/stats/tests.py @@ -479,11 +479,7 @@ def diagnostics( # Shapiroo-Wilk test for Normality stat, p = shapiro(series) - df.loc["Shapiroo", cols] = ( - "Normality", - stat, - p, - ) + df.loc["Shapiroo", cols] = "Normality", stat, p # D'Agostino test for Normality stat, p = normaltest(series) @@ -508,8 +504,6 @@ def diagnostics( df.loc["Stoffer-Toloi", cols] = "Autocorr.", stat, p df["Reject H0 ($\\alpha$={:.2f})".format(alpha)] = df.loc[:, "P-value"] < alpha - df[["Statistic", "P-value"]] = df[["Statistic", "P-value"]].applymap( - float_fmt.format - ) - + df.loc[:, "P-value"] = df.loc[:, "P-value"].apply(float_fmt.format) + df.loc[:, "Statistic"] = df.loc[:, "Statistic"].apply(float_fmt.format) return df diff --git a/pastas/stressmodels.py b/pastas/stressmodels.py index da39e38a..e4c0a02c 100644 --- a/pastas/stressmodels.py +++ b/pastas/stressmodels.py @@ -401,7 +401,6 @@ def simulate( data=fftconvolve(stress, b, "full")[:npoints], index=stress.index, name=self.name, - fastpath=True, ) return h @@ -509,7 +508,6 @@ def simulate( data=fftconvolve(h, b, "full")[:npoints], index=h.index, name=self.name, - fastpath=True, ) return h @@ -875,7 +873,7 @@ def simulate( p_with_r = np.concatenate([p, np.array([r])]) b = self._get_block(p_with_r, dt, tmin, tmax) c = fftconvolve(stress, b, "full")[:npoints] - h = h.add(Series(c, index=stress.index, fastpath=True), fill_value=0.0) + h = h.add(Series(c, index=stress.index), fill_value=0.0) if istress is not None: if isinstance(istress, list): h.name = self.name + "_" + "+".join(str(i) for i in istress) @@ -1402,7 +1400,6 @@ def simulate( data=fftconvolve(stress, b, "full")[: stress.size], index=self.prec.series.index, name=name, - fastpath=True, ) def get_stress( @@ -1457,7 +1454,6 @@ def get_stress( data=stress, index=self.prec.series.index, name="recharge", - fastpath=True, ) elif istress == 0: return self.prec.series @@ -1919,13 +1915,11 @@ def simulate( data=fftconvolve(stress, rfunc1, "full")[:npoints], index=stress.index, name="1", - fastpath=True, ) h2 = Series( data=fftconvolve(stress, rfunc2, "full")[:npoints], index=stress.index, name="1", - fastpath=True, ) h = omega * h1 + (1 - omega) * h2 diff --git a/pastas/timeseries.py b/pastas/timeseries.py index 08fd8719..b2468853 100644 --- a/pastas/timeseries.py +++ b/pastas/timeseries.py @@ -4,7 +4,7 @@ from typing import Optional, Union import pandas as pd -from pandas import Series +from pandas import Series, Timedelta from pandas.tseries.frequencies import to_offset from .rcparams import rcParams @@ -324,7 +324,7 @@ def _sample_up(self, series: Series) -> Series: elif method == "interpolate": series = series.asfreq(freq).interpolate(method="time") elif method == "divide": - dt = series.index.to_series().diff() / to_offset(freq).delta + dt = series.index.to_series().diff() / Timedelta(to_offset(freq)) series = series / dt series = series.asfreq(freq, method="bfill") elif isinstance(method, float): diff --git a/pastas/timeseries_utils.py b/pastas/timeseries_utils.py index f2a06420..4490b955 100644 --- a/pastas/timeseries_utils.py +++ b/pastas/timeseries_utils.py @@ -39,7 +39,7 @@ def _frequency_is_supported(freq: str) -> str: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases The frequency can be a multiple of these offsets, like '7D'. Because of the use in convolution, only frequencies with an equidistant offset are allowed. This - means monthly ('M'), yearly ('Y') or even weekly ('W') frequencies are not + means monthly ('M'), yearly ('A') or even weekly ('W') frequencies are not allowed. Use '7D' for a weekly simulation. D calendar day frequency @@ -88,7 +88,7 @@ def _get_stress_dt(freq: str) -> float: # Get the frequency string and multiplier offset = to_offset(freq) if hasattr(offset, "delta"): - dt = offset.delta / Timedelta(1, "D") + dt = Timedelta(offset) / Timedelta(1, "D") else: num = offset.n freq = offset._prefix @@ -132,7 +132,7 @@ def _get_dt(freq: str) -> float: Number of days. """ # Get the frequency string and multiplier - dt = to_offset(freq).delta / Timedelta(1, "D") + dt = Timedelta(to_offset(freq)) / Timedelta(1, "D") return dt @@ -178,7 +178,7 @@ def _infer_fixed_freq(tindex: Index) -> str: return freq offset = to_offset(freq) - if to_offset(offset.rule_code).is_anchored(): + if to_offset(offset.rule_code).n == 1: dt = _get_stress_dt(freq) return f"{dt}D" @@ -335,7 +335,7 @@ def get_equidistant_series_nearest( series : pandas.Series original (non-equidistant) time series freq : str - frequency of the new equidistant time series (i.e. "H", "D", "7D", etc.) + frequency of the new equidistant time series (i.e. "h", "D", "7D", etc.) minimize_data_loss : bool, optional if set to True, method will attempt use any unsampled points from original time series to fill some remaining NaNs in the new equidistant time series. diff --git a/pyproject.toml b/pyproject.toml index bbc47b0c..c68a32c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ requires-python = ">= 3.8" dependencies = [ "numpy >= 1.17", "matplotlib >= 3.1", - "pandas >= 1.1, < 2.1", + "pandas >= 1.1", "scipy >= 1.8", "numba >= 0.51", ] @@ -100,7 +100,7 @@ legacy_tox_ini = """ description = run unit tests extras = ci commands = - pytest tests -m "not notebooks" -m "not bnotebooks" + pytest tests -m "not notebooks and not bnotebooks" [testenv:all] description = run all unit tests (including Notebooks) and obtain coverage diff --git a/tests/test_gxg.py b/tests/test_gxg.py index 58cb5464..b562f032 100644 --- a/tests/test_gxg.py +++ b/tests/test_gxg.py @@ -43,7 +43,7 @@ def test_glg(self) -> None: [x.month + x.day for x in idx], index=idx, ) - v = ps.stats.glg(s, year_offset="a") + v = ps.stats.glg(s, year_offset="A") assert v == 16.0 def test_glg_fill_limit(self) -> None: @@ -54,7 +54,7 @@ def test_glg_fill_limit(self) -> None: fill_method="linear", limit=15, output="yearly", - year_offset="a", + year_offset="A", min_n_meas=1, ) assert v.notna().sum() == 2 @@ -67,7 +67,7 @@ def test_glg_fill_limit_null(self) -> None: fill_method="linear", limit=None, output="yearly", - year_offset="a", + year_offset="A", min_n_meas=1, ) assert v.notna().sum() == 3 diff --git a/tests/test_model.py b/tests/test_model.py index 8bf60851..79f73b35 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -20,7 +20,7 @@ def generate_synthetic_heads(input, rfunc, params, const=10.0, cutoff=0.999, dt= h = const * np.ones(len(input) + step.size) for i in range(len(input)): - h[i : i + step.size] += input[i] * step + h[i : i + step.size] += input.iloc[i] * step head = Series( index=input.index, @@ -190,7 +190,7 @@ def test_model_freq_h(): a_tide = 0.15 # sine with period 12 hrs 25 minutes and amplitude 1.5 m - tidx = date_range(obs.index[0], obs.index[-1] + Timedelta(hours=23), freq="H") + tidx = date_range(obs.index[0], obs.index[-1] + Timedelta(hours=23), freq="h") tides = Series( index=tidx, data=1.5 * np.sin(2 * np.pi * np.arange(tidx.size) / (0.517375)), @@ -199,7 +199,7 @@ def test_model_freq_h(): ht = generate_synthetic_heads(tides, rf_tide, (A_tide, a_tide), dt=1 / 24.0) # model with hourly timestep - ml_h = ps.Model(ht, name="tidal_model", freq="H") + ml_h = ps.Model(ht, name="tidal_model", freq="h") sm = ps.StressModel( tides, rfunc=ps.Exponential(), @@ -209,5 +209,5 @@ def test_model_freq_h(): ml_h.add_stressmodel(sm) ml_h.solve(noise=False, report=False) - assert ml_h.simulate().index.freq == "H" + assert ml_h.simulate().index.freq == "h" assert ml_h.stats.rsq() > 0.99999 diff --git a/tests/test_rfuncs.py b/tests/test_rfuncs.py index 2d707bf2..c82d849a 100644 --- a/tests/test_rfuncs.py +++ b/tests/test_rfuncs.py @@ -9,7 +9,7 @@ def test_rfunc(rfunc_name) -> None: rfunc = getattr(ps.rfunc, rfunc_name)() if rfunc_name == "HantushWellModel": rfunc.set_distances(100.0) - p = rfunc.get_init_parameters("test").initial + p = rfunc.get_init_parameters("test").initial.to_numpy() rfunc.block(p) rfunc.step(p) @@ -30,7 +30,7 @@ def test_to_dict_rfuncs(rfunc_name) -> None: rfunc1.set_distances(100.0) rfunc2.set_distances(100.0) - p1 = rfunc1.get_init_parameters("test").initial - p2 = rfunc2.get_init_parameters("test").initial + p1 = rfunc1.get_init_parameters("test").initial.to_numpy() + p2 = rfunc2.get_init_parameters("test").initial.to_numpy() assert (rfunc1.step(p1) - rfunc2.step(p2)).sum() == 0.0 From 3189a407062a3ca0c193ee4c9665c4f9bc6440cc Mon Sep 17 00:00:00 2001 From: Raoul Collenteur Date: Mon, 19 Feb 2024 13:52:58 +0100 Subject: [PATCH 10/10] set version 1.4.0 --- pastas/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastas/version.py b/pastas/version.py index ada69ce1..ecdf0b22 100644 --- a/pastas/version.py +++ b/pastas/version.py @@ -4,7 +4,7 @@ logger = logging.getLogger(__name__) -__version__ = "1.4.0b" +__version__ = "1.4.0" def check_numba_scipy() -> bool: