Skip to content

Commit

Permalink
Add MCMC solver to Pastas (#572)
Browse files Browse the repository at this point in the history
  • Loading branch information
raoulcollenteur committed Aug 15, 2023
1 parent 6a1bc90 commit 9cf6126
Show file tree
Hide file tree
Showing 13 changed files with 1,197 additions and 313 deletions.
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ left-hand menu you will find the different categories of the API documentation.
rfunc
recharge
solver
objective_functions
modelplots.Plotting
modelcompare.CompareModels
timeseries.TimeSeries
Expand Down
534 changes: 331 additions & 203 deletions doc/examples/uncertainty_emcee.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pastas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pastas.recharge as rch
import pastas.stats as stats
import pastas.timeseries_utils as ts

from .decorators import set_use_numba
from .model import Model
from .modelcompare import CompareModels
Expand All @@ -26,7 +25,8 @@
Polder,
Spline,
)
from .solver import LeastSquares, LmfitSolve
from .solver import LeastSquares, LmfitSolve, EmceeSolve
import pastas.objective_functions as objfunc
from .stressmodels import (
ChangeModel,
Constant,
Expand Down
18 changes: 17 additions & 1 deletion pastas/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,16 @@ def __init__(
self.name = validate_name(name)

self.parameters = DataFrame(
columns=["initial", "name", "optimal", "pmin", "pmax", "vary", "stderr"]
columns=[
"initial",
"name",
"optimal",
"pmin",
"pmax",
"vary",
"stderr",
"dist",
]
)

# Define the model components
Expand Down Expand Up @@ -839,6 +848,7 @@ def set_parameter(
pmin: Optional[float] = None,
pmax: Optional[float] = None,
optimal: Optional[float] = None,
dist: Optional[str] = None,
move_bounds: bool = False,
) -> None:
"""Method to change the parameter properties.
Expand All @@ -857,6 +867,8 @@ def set_parameter(
maximum value for the parameter.
optimal: float, optional
optimal value for the parameter.
dist: str, optional
Distribution of the parameters.
move_bounds: bool, optional
Reset pmin/pmax based on new initial value. Of move_bounds=True, pmin and
pmax must be None.
Expand Down Expand Up @@ -917,6 +929,9 @@ def set_parameter(
if pmax is not None:
obj._set_pmax(name, pmax)
self.parameters.loc[name, "pmax"] = pmax
if dist is not None:
obj._set_dist(name, dist)
self.parameters.loc[name, "dist"] = dist
if optimal is not None:
self.parameters.loc[name, "optimal"] = optimal

Expand Down Expand Up @@ -1168,6 +1183,7 @@ def get_init_parameters(
if not initial:
parameters.initial.update(self.parameters.optimal)
parameters.optimal.update(self.parameters.optimal)
parameters.stderr.update(self.parameters.stderr)

return parameters

Expand Down
71 changes: 52 additions & 19 deletions pastas/noisemodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,24 @@ class NoiseModelBase:
def __init__(self) -> None:
self.nparam = 1
self.name = "noise"
self.parameters = DataFrame(columns=["initial", "pmin", "pmax", "vary", "name"])
self.parameters = DataFrame(
columns=["initial", "pmin", "pmax", "vary", "name", "dist"]
)

def set_init_parameters(self, oseries: Optional[Series] = None) -> None:
if oseries is not None:
pinit = np.diff(oseries.index.to_numpy()) / Timedelta("1D")
pinit = np.median(pinit)
else:
pinit = 14.0
self.parameters.loc["noise_alpha"] = (pinit, 1e-5, 5000.0, True, "noise")
self.parameters.loc["noise_alpha"] = (
pinit,
1e-5,
5000.0,
True,
"noise",
"uniform",
)

@set_parameter
def _set_initial(self, name: str, value: float) -> None:
Expand Down Expand Up @@ -89,6 +98,16 @@ def _set_vary(self, name: str, value: float) -> None:
"""
self.parameters.loc[name, "vary"] = value

@set_parameter
def _set_dist(self, name: str, value: str) -> None:
"""Internal method to set distribution of prior of the parameter.
Notes
-----
The preferred method for parameter setting is through the model.
"""
self.parameters.loc[name, "dist"] = str(value)

def to_dict(self) -> dict:
"""Method to return a dict to store the noise model"""
data = {"class": self._name, "norm": self.norm}
Expand Down Expand Up @@ -201,22 +220,22 @@ def to_dict(self) -> dict:

class ArmaModel(NoiseModelBase):
"""ARMA(1,1) Noise model to simulate the noise as defined in
:cite:t:`collenteur_estimation_2021`.
:cite:t:`collenteur_estimation_2021`.
Notes
-----
Calculates the noise according to:
.. math::
\\upsilon_t = r_t - r_{t-1} e^{-\\Delta t/\\alpha} - \\upsilon_{t-1}
e^{-\\Delta t/\\beta}
The units of the alpha and beta parameters are always in days.
Warnings
--------
This model has only been tested on regular time steps and should not be used for
irregular time steps yet.
Notes
-----
Calculates the noise according to:
F
.. math::
\\upsilon_t = r_t - r_{t-1} e^{-\\Delta t/\\alpha} - \\upsilon_{t-1}
e^{-\\Delta t/\\beta}
The units of the alpha and beta parameters are always in days.
Warnings
--------
This model has only been tested on regular time steps and should not be used for
irregular time steps yet.
"""

_name = "ArmaModel"
Expand All @@ -232,8 +251,22 @@ def set_init_parameters(self, oseries: Series = None) -> None:
pinit = np.median(pinit)
else:
pinit = 14.0
self.parameters.loc["noise_alpha"] = (pinit, 1e-9, 5000.0, True, "noise")
self.parameters.loc["noise_beta"] = (1.0, -np.inf, np.inf, True, "noise")
self.parameters.loc["noise_alpha"] = (
pinit,
1e-9,
5000.0,
True,
"noise",
"uniform",
)
self.parameters.loc["noise_beta"] = (
1.0,
-np.inf,
np.inf,
True,
"noise",
"uniform",
)

def simulate(self, res: Series, p: ArrayLike) -> Series:
alpha = p[0]
Expand Down
149 changes: 149 additions & 0 deletions pastas/objective_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""This module contains the objective functions that can be used with the pastas
`EmceeSolve` solver.
"""
from numpy import pi, log
from pandas import DataFrame


class GaussianLikelihood:
"""Gaussian likelihood function.
Notes
-----
The Gaussian log-likelihood function is defined as:
.. math::
\\log(L) = -\\frac{N}{2}\\log(2\\pi\\sigma^2) +
\\frac{\\sum_{i=1}^N - \\epsilon_i^2}{2\\sigma^2}
where :math:`N` is the number of observations, :math:`\\sigma^2` is the variance of
the residuals, and :math:`\\epsilon_i` is the residual at time :math:`i`. The
parameter :math:`\\sigma^2` need to be estimated.
"""

_name = "GaussianLikelihood"

def __init__(self):
self.nparam = 1

def get_init_parameters(self, name: str) -> DataFrame:
"""Get the initial parameters for the log-likelihood function.
Parameters
----------
name: str
Name of the log-likelihood function.
Returns
-------
parameters: DataFrame
Initial parameters for the log-likelihood function.
"""
parameters = DataFrame(
columns=["initial", "pmin", "pmax", "vary", "stderr", "name", "dist"]
)
parameters.loc[name + "_sigma"] = (0.05, 1e-10, 1, True, 0.01, name, "uniform")
return parameters

def compute(self, rv, p):
"""Compute the log-likelihood.
Parameters
----------
rv: array
Residuals of the model.
p: array or list
Parameters of the log-likelihood function.
Returns
-------
ln: float
Log-likelihood
"""
sigma = p[-1]
N = rv.size
ln = -0.5 * N * log(2 * pi * sigma) + sum(-(rv**2) / (2 * sigma))
return ln


class GaussianLikelihoodAr1:
"""Gaussian likelihood function with AR1 autocorrelated residuals.
Notes
-----
The Gaussian log-likelihood function with AR1 autocorrelated residual is defined as:
.. math::
\\log(L) = -\\frac{N-1}{2}\\log(2\\pi\\sigma^2) +
\\frac{\\sum_{i=1}^N - (\\epsilon_i - \\phi \\epsilon_{i-\\Delta t})^2}
{2\\sigma^2}
where :math:`N` is the number of observations, :math:`\\sigma^2` is the
variance of the residuals, :math:`\\epsilon_i` is the residual at time
:math:`i` and :math:`\\mu` is the mean of the residuals. :math:`\\Delta t` is
the time step between the observations. :math:`\\phi` is the autoregressive
parameter. The parameters :math:`\\phi` and :math:`\\sigma^2` need to be estimated.
"""

_name = "GaussianLikelihoodAr1"

def __init__(self):
self.nparam = 2

def get_init_parameters(self, name: str) -> DataFrame:
"""Get the initial parameters for the log-likelihood function.
Parameters
----------
name: str
Name of the log-likelihood function.
Returns
-------
parameters: DataFrame
Initial parameters for the log-likelihood function.
"""
parameters = DataFrame(
columns=["initial", "pmin", "pmax", "vary", "stderr", "name", "dist"]
)
parameters.loc[name + "_sigma"] = (0.05, 1e-10, 1, True, 0.01, name, "uniform")
parameters.loc[name + "_theta"] = (
0.5,
1e-10,
0.99999,
True,
0.2,
name,
"uniform",
)
return parameters

def compute(self, rv, p):
"""Compute the log-likelihood.
Parameters
----------
rv: array
Residuals of the model.
p: array or list
Parameters of the log-likelihood function.
Returns
-------
ln: float
Log-likelihood.
"""
sigma = p[-2]
theta = p[-1]
N = rv.size
ln = -(N - 1) / 2 * log(2 * pi * sigma) + sum(
-((rv[1:] - theta * rv[0:-1]) ** 2) / (2 * sigma)
)
return ln
Loading

0 comments on commit 9cf6126

Please sign in to comment.