Add MCMC solver to Pastas (#572)

pastas · Aug 15, 2023 · 9cf6126 · 9cf6126
1 parent 6a1bc90
commit 9cf6126
Show file tree

Hide file tree

Showing 13 changed files with 1,197 additions and 313 deletions.
diff --git a/doc/api/index.rst b/doc/api/index.rst
@@ -18,6 +18,7 @@ left-hand menu you will find the different categories of the API documentation.
     rfunc
     recharge
     solver
+    objective_functions
     modelplots.Plotting
     modelcompare.CompareModels
     timeseries.TimeSeries

diff --git a/doc/examples/uncertainty_emcee.ipynb b/doc/examples/uncertainty_emcee.ipynb
diff --git a/pastas/__init__.py b/pastas/__init__.py
@@ -7,7 +7,6 @@
 import pastas.recharge as rch
 import pastas.stats as stats
 import pastas.timeseries_utils as ts
-
 from .decorators import set_use_numba
 from .model import Model
 from .modelcompare import CompareModels
@@ -26,7 +25,8 @@
     Polder,
     Spline,
 )
-from .solver import LeastSquares, LmfitSolve
+from .solver import LeastSquares, LmfitSolve, EmceeSolve
+import pastas.objective_functions as objfunc
 from .stressmodels import (
     ChangeModel,
     Constant,

diff --git a/pastas/model.py b/pastas/model.py
@@ -103,7 +103,16 @@ def __init__(
         self.name = validate_name(name)
 
         self.parameters = DataFrame(
-            columns=["initial", "name", "optimal", "pmin", "pmax", "vary", "stderr"]
+            columns=[
+                "initial",
+                "name",
+                "optimal",
+                "pmin",
+                "pmax",
+                "vary",
+                "stderr",
+                "dist",
+            ]
         )
 
         # Define the model components
@@ -839,6 +848,7 @@ def set_parameter(
         pmin: Optional[float] = None,
         pmax: Optional[float] = None,
         optimal: Optional[float] = None,
+        dist: Optional[str] = None,
         move_bounds: bool = False,
     ) -> None:
         """Method to change the parameter properties.
@@ -857,6 +867,8 @@ def set_parameter(
             maximum value for the parameter.
         optimal: float, optional
             optimal value for the parameter.
+        dist: str, optional
+            Distribution of the parameters.
         move_bounds: bool, optional
             Reset pmin/pmax based on new initial value. Of move_bounds=True, pmin and
             pmax must be None.
@@ -917,6 +929,9 @@ def set_parameter(
         if pmax is not None:
             obj._set_pmax(name, pmax)
             self.parameters.loc[name, "pmax"] = pmax
+        if dist is not None:
+            obj._set_dist(name, dist)
+            self.parameters.loc[name, "dist"] = dist
         if optimal is not None:
             self.parameters.loc[name, "optimal"] = optimal
 
@@ -1168,6 +1183,7 @@ def get_init_parameters(
         if not initial:
             parameters.initial.update(self.parameters.optimal)
             parameters.optimal.update(self.parameters.optimal)
+            parameters.stderr.update(self.parameters.stderr)
 
         return parameters
 

diff --git a/pastas/noisemodels.py b/pastas/noisemodels.py
@@ -39,15 +39,24 @@ class NoiseModelBase:
     def __init__(self) -> None:
         self.nparam = 1
         self.name = "noise"
-        self.parameters = DataFrame(columns=["initial", "pmin", "pmax", "vary", "name"])
+        self.parameters = DataFrame(
+            columns=["initial", "pmin", "pmax", "vary", "name", "dist"]
+        )
 
     def set_init_parameters(self, oseries: Optional[Series] = None) -> None:
         if oseries is not None:
             pinit = np.diff(oseries.index.to_numpy()) / Timedelta("1D")
             pinit = np.median(pinit)
         else:
             pinit = 14.0
-        self.parameters.loc["noise_alpha"] = (pinit, 1e-5, 5000.0, True, "noise")
+        self.parameters.loc["noise_alpha"] = (
+            pinit,
+            1e-5,
+            5000.0,
+            True,
+            "noise",
+            "uniform",
+        )
 
     @set_parameter
     def _set_initial(self, name: str, value: float) -> None:
@@ -89,6 +98,16 @@ def _set_vary(self, name: str, value: float) -> None:
         """
         self.parameters.loc[name, "vary"] = value
 
+    @set_parameter
+    def _set_dist(self, name: str, value: str) -> None:
+        """Internal method to set distribution of prior of the parameter.
+
+        Notes
+        -----
+        The preferred method for parameter setting is through the model.
+        """
+        self.parameters.loc[name, "dist"] = str(value)
+
     def to_dict(self) -> dict:
         """Method to return a dict to store the noise model"""
         data = {"class": self._name, "norm": self.norm}
@@ -201,22 +220,22 @@ def to_dict(self) -> dict:
 
 class ArmaModel(NoiseModelBase):
     """ARMA(1,1) Noise model to simulate the noise as defined in
-    :cite:t:`collenteur_estimation_2021`.
+        :cite:t:`collenteur_estimation_2021`.
 
-    Notes
-    -----
-    Calculates the noise according to:
-
-    .. math::
-        \\upsilon_t = r_t - r_{t-1} e^{-\\Delta t/\\alpha} - \\upsilon_{t-1}
-        e^{-\\Delta t/\\beta}
-
-    The units of the alpha and beta parameters are always in days.
-
-    Warnings
-    --------
-    This model has only been tested on regular time steps and should not be used for
-    irregular time steps yet.
+        Notes
+        -----
+        Calculates the noise according to:
+    F
+        .. math::
+            \\upsilon_t = r_t - r_{t-1} e^{-\\Delta t/\\alpha} - \\upsilon_{t-1}
+            e^{-\\Delta t/\\beta}
+
+        The units of the alpha and beta parameters are always in days.
+
+        Warnings
+        --------
+        This model has only been tested on regular time steps and should not be used for
+        irregular time steps yet.
     """
 
     _name = "ArmaModel"
@@ -232,8 +251,22 @@ def set_init_parameters(self, oseries: Series = None) -> None:
             pinit = np.median(pinit)
         else:
             pinit = 14.0
-        self.parameters.loc["noise_alpha"] = (pinit, 1e-9, 5000.0, True, "noise")
-        self.parameters.loc["noise_beta"] = (1.0, -np.inf, np.inf, True, "noise")
+        self.parameters.loc["noise_alpha"] = (
+            pinit,
+            1e-9,
+            5000.0,
+            True,
+            "noise",
+            "uniform",
+        )
+        self.parameters.loc["noise_beta"] = (
+            1.0,
+            -np.inf,
+            np.inf,
+            True,
+            "noise",
+            "uniform",
+        )
 
     def simulate(self, res: Series, p: ArrayLike) -> Series:
         alpha = p[0]

diff --git a/pastas/objective_functions.py b/pastas/objective_functions.py
@@ -0,0 +1,149 @@
+"""This module contains the objective functions that can be used with the pastas 
+`EmceeSolve` solver.   
+
+"""
+from numpy import pi, log
+from pandas import DataFrame
+
+
+class GaussianLikelihood:
+    """Gaussian likelihood function.
+
+    Notes
+    -----
+    The Gaussian log-likelihood function is defined as:
+
+    .. math::
+        \\log(L) = -\\frac{N}{2}\\log(2\\pi\\sigma^2) +
+        \\frac{\\sum_{i=1}^N - \\epsilon_i^2}{2\\sigma^2}
+
+    where :math:`N` is the number of observations, :math:`\\sigma^2` is the variance of
+    the residuals, and :math:`\\epsilon_i` is the residual at time :math:`i`. The
+    parameter :math:`\\sigma^2` need to be estimated.
+
+    """
+
+    _name = "GaussianLikelihood"
+
+    def __init__(self):
+        self.nparam = 1
+
+    def get_init_parameters(self, name: str) -> DataFrame:
+        """Get the initial parameters for the log-likelihood function.
+
+        Parameters
+        ----------
+        name: str
+            Name of the log-likelihood function.
+
+        Returns
+        -------
+        parameters: DataFrame
+            Initial parameters for the log-likelihood function.
+
+        """
+        parameters = DataFrame(
+            columns=["initial", "pmin", "pmax", "vary", "stderr", "name", "dist"]
+        )
+        parameters.loc[name + "_sigma"] = (0.05, 1e-10, 1, True, 0.01, name, "uniform")
+        return parameters
+
+    def compute(self, rv, p):
+        """Compute the log-likelihood.
+
+        Parameters
+        ----------
+        rv: array
+            Residuals of the model.
+        p: array or list
+            Parameters of the log-likelihood function.
+
+        Returns
+        -------
+        ln: float
+            Log-likelihood
+
+        """
+        sigma = p[-1]
+        N = rv.size
+        ln = -0.5 * N * log(2 * pi * sigma) + sum(-(rv**2) / (2 * sigma))
+        return ln
+
+
+class GaussianLikelihoodAr1:
+    """Gaussian likelihood function with AR1 autocorrelated residuals.
+
+    Notes
+    -----
+    The Gaussian log-likelihood function with AR1 autocorrelated residual is defined as:
+
+    .. math::
+        \\log(L) = -\\frac{N-1}{2}\\log(2\\pi\\sigma^2) +
+        \\frac{\\sum_{i=1}^N - (\\epsilon_i - \\phi \\epsilon_{i-\\Delta t})^2}
+        {2\\sigma^2}
+
+    where :math:`N` is the number of observations, :math:`\\sigma^2` is the
+    variance of the residuals, :math:`\\epsilon_i` is the residual at time
+    :math:`i` and :math:`\\mu` is the mean of the residuals. :math:`\\Delta t` is
+    the time step between the observations. :math:`\\phi` is the autoregressive
+    parameter. The parameters :math:`\\phi` and :math:`\\sigma^2` need to be estimated.
+
+    """
+
+    _name = "GaussianLikelihoodAr1"
+
+    def __init__(self):
+        self.nparam = 2
+
+    def get_init_parameters(self, name: str) -> DataFrame:
+        """Get the initial parameters for the log-likelihood function.
+
+        Parameters
+        ----------
+        name: str
+            Name of the log-likelihood function.
+
+        Returns
+        -------
+        parameters: DataFrame
+            Initial parameters for the log-likelihood function.
+
+        """
+        parameters = DataFrame(
+            columns=["initial", "pmin", "pmax", "vary", "stderr", "name", "dist"]
+        )
+        parameters.loc[name + "_sigma"] = (0.05, 1e-10, 1, True, 0.01, name, "uniform")
+        parameters.loc[name + "_theta"] = (
+            0.5,
+            1e-10,
+            0.99999,
+            True,
+            0.2,
+            name,
+            "uniform",
+        )
+        return parameters
+
+    def compute(self, rv, p):
+        """Compute the log-likelihood.
+
+        Parameters
+        ----------
+        rv: array
+            Residuals of the model.
+        p: array or list
+            Parameters of the log-likelihood function.
+
+        Returns
+        -------
+        ln: float
+            Log-likelihood.
+
+        """
+        sigma = p[-2]
+        theta = p[-1]
+        N = rv.size
+        ln = -(N - 1) / 2 * log(2 * pi * sigma) + sum(
+            -((rv[1:] - theta * rv[0:-1]) ** 2) / (2 * sigma)
+        )
+        return ln