Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MCMC solver to Pastas #572

Merged
merged 19 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ left-hand menu you will find the different categories of the API documentation.
rfunc
recharge
solver
objective_functions
modelplots.Plotting
modelcompare.CompareModels
timeseries.TimeSeries
Expand Down
534 changes: 331 additions & 203 deletions doc/examples/uncertainty_emcee.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pastas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pastas.recharge as rch
import pastas.stats as stats
import pastas.timeseries_utils as ts

from .decorators import set_use_numba
from .model import Model
from .modelcompare import CompareModels
Expand All @@ -26,7 +25,8 @@
Polder,
Spline,
)
from .solver import LeastSquares, LmfitSolve
from .solver import LeastSquares, LmfitSolve, EmceeSolve
import pastas.objective_functions as objfunc
from .stressmodels import (
ChangeModel,
Constant,
Expand Down
18 changes: 17 additions & 1 deletion pastas/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,16 @@ def __init__(
self.name = validate_name(name)

self.parameters = DataFrame(
columns=["initial", "name", "optimal", "pmin", "pmax", "vary", "stderr"]
columns=[
"initial",
"name",
"optimal",
"pmin",
"pmax",
"vary",
"stderr",
"dist",
]
)

# Define the model components
Expand Down Expand Up @@ -835,6 +844,7 @@ def set_parameter(
pmin: Optional[float] = None,
pmax: Optional[float] = None,
optimal: Optional[float] = None,
dist: Optional[str] = None,
move_bounds: bool = False,
) -> None:
"""Method to change the parameter properties.
Expand All @@ -853,6 +863,8 @@ def set_parameter(
maximum value for the parameter.
optimal: float, optional
optimal value for the parameter.
dist: str, optional
Distribution of the parameters.
move_bounds: bool, optional
Reset pmin/pmax based on new initial value. Of move_bounds=True, pmin and
pmax must be None.
Expand Down Expand Up @@ -913,6 +925,9 @@ def set_parameter(
if pmax is not None:
obj._set_pmax(name, pmax)
self.parameters.loc[name, "pmax"] = pmax
if dist is not None:
obj._set_dist(name, dist)
self.parameters.loc[name, "dist"] = dist
if optimal is not None:
self.parameters.loc[name, "optimal"] = optimal

Expand Down Expand Up @@ -1164,6 +1179,7 @@ def get_init_parameters(
if not initial:
parameters.initial.update(self.parameters.optimal)
parameters.optimal.update(self.parameters.optimal)
parameters.stderr.update(self.parameters.stderr)

return parameters

Expand Down
71 changes: 52 additions & 19 deletions pastas/noisemodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,24 @@ class NoiseModelBase:
def __init__(self) -> None:
self.nparam = 1
self.name = "noise"
self.parameters = DataFrame(columns=["initial", "pmin", "pmax", "vary", "name"])
self.parameters = DataFrame(
columns=["initial", "pmin", "pmax", "vary", "name", "dist"]
)

def set_init_parameters(self, oseries: Optional[Series] = None) -> None:
if oseries is not None:
pinit = np.diff(oseries.index.to_numpy()) / Timedelta("1D")
pinit = np.median(pinit)
else:
pinit = 14.0
self.parameters.loc["noise_alpha"] = (pinit, 1e-5, 5000.0, True, "noise")
self.parameters.loc["noise_alpha"] = (
pinit,
1e-5,
5000.0,
True,
"noise",
"uniform",
)

@set_parameter
def _set_initial(self, name: str, value: float) -> None:
Expand Down Expand Up @@ -89,6 +98,16 @@ def _set_vary(self, name: str, value: float) -> None:
"""
self.parameters.loc[name, "vary"] = value

@set_parameter
def _set_dist(self, name: str, value: str) -> None:
"""Internal method to set distribution of prior of the parameter.

Notes
-----
The preferred method for parameter setting is through the model.
"""
self.parameters.loc[name, "dist"] = str(value)

def to_dict(self) -> dict:
"""Method to return a dict to store the noise model"""
data = {"class": self._name, "norm": self.norm}
Expand Down Expand Up @@ -201,22 +220,22 @@ def to_dict(self) -> dict:

class ArmaModel(NoiseModelBase):
"""ARMA(1,1) Noise model to simulate the noise as defined in
:cite:t:`collenteur_estimation_2021`.
:cite:t:`collenteur_estimation_2021`.

Notes
-----
Calculates the noise according to:

.. math::
\\upsilon_t = r_t - r_{t-1} e^{-\\Delta t/\\alpha} - \\upsilon_{t-1}
e^{-\\Delta t/\\beta}

The units of the alpha and beta parameters are always in days.

Warnings
--------
This model has only been tested on regular time steps and should not be used for
irregular time steps yet.
Notes
-----
Calculates the noise according to:
F
.. math::
\\upsilon_t = r_t - r_{t-1} e^{-\\Delta t/\\alpha} - \\upsilon_{t-1}
e^{-\\Delta t/\\beta}

The units of the alpha and beta parameters are always in days.

Warnings
--------
This model has only been tested on regular time steps and should not be used for
irregular time steps yet.
"""

_name = "ArmaModel"
Expand All @@ -232,8 +251,22 @@ def set_init_parameters(self, oseries: Series = None) -> None:
pinit = np.median(pinit)
else:
pinit = 14.0
self.parameters.loc["noise_alpha"] = (pinit, 1e-9, 5000.0, True, "noise")
self.parameters.loc["noise_beta"] = (1.0, -np.inf, np.inf, True, "noise")
self.parameters.loc["noise_alpha"] = (
pinit,
1e-9,
5000.0,
True,
"noise",
"uniform",
)
self.parameters.loc["noise_beta"] = (
1.0,
-np.inf,
np.inf,
True,
"noise",
"uniform",
)

def simulate(self, res: Series, p: ArrayLike) -> Series:
alpha = p[0]
Expand Down
149 changes: 149 additions & 0 deletions pastas/objective_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""This module contains the objective functions that can be used with the pastas
`EmceeSolve` solver.

"""
from numpy import pi, log
from pandas import DataFrame


class GaussianLikelihood:
"""Gaussian likelihood function.

Notes
-----
The Gaussian log-likelihood function is defined as:

.. math::
\\log(L) = -\\frac{N}{2}\\log(2\\pi\\sigma^2) +
\\frac{\\sum_{i=1}^N - \\epsilon_i^2}{2\\sigma^2}

where :math:`N` is the number of observations, :math:`\\sigma^2` is the variance of
the residuals, and :math:`\\epsilon_i` is the residual at time :math:`i`. The
parameter :math:`\\sigma^2` need to be estimated.

"""

_name = "GaussianLikelihood"

def __init__(self):
self.nparam = 1

def get_init_parameters(self, name: str) -> DataFrame:
"""Get the initial parameters for the log-likelihood function.

Parameters
----------
name: str
Name of the log-likelihood function.

Returns
-------
parameters: DataFrame
Initial parameters for the log-likelihood function.

"""
parameters = DataFrame(
columns=["initial", "pmin", "pmax", "vary", "stderr", "name", "dist"]
)
parameters.loc[name + "_sigma"] = (0.05, 1e-10, 1, True, 0.01, name, "uniform")
return parameters

def compute(self, rv, p):
"""Compute the log-likelihood.

Parameters
----------
rv: array
Residuals of the model.
p: array or list
Parameters of the log-likelihood function.

Returns
-------
ln: float
Log-likelihood

"""
sigma = p[-1]
N = rv.size
ln = -0.5 * N * log(2 * pi * sigma) + sum(-(rv**2) / (2 * sigma))
return ln


class GaussianLikelihoodAr1:
"""Gaussian likelihood function with AR1 autocorrelated residuals.

Notes
-----
The Gaussian log-likelihood function with AR1 autocorrelated residual is defined as:

.. math::
\\log(L) = -\\frac{N-1}{2}\\log(2\\pi\\sigma^2) +
\\frac{\\sum_{i=1}^N - (\\epsilon_i - \\phi \\epsilon_{i-\\Delta t})^2}
{2\\sigma^2}

where :math:`N` is the number of observations, :math:`\\sigma^2` is the
variance of the residuals, :math:`\\epsilon_i` is the residual at time
:math:`i` and :math:`\\mu` is the mean of the residuals. :math:`\\Delta t` is
the time step between the observations. :math:`\\phi` is the autoregressive
parameter. The parameters :math:`\\phi` and :math:`\\sigma^2` need to be estimated.

"""

_name = "GaussianLikelihoodAr1"

def __init__(self):
self.nparam = 2

def get_init_parameters(self, name: str) -> DataFrame:
"""Get the initial parameters for the log-likelihood function.

Parameters
----------
name: str
Name of the log-likelihood function.

Returns
-------
parameters: DataFrame
Initial parameters for the log-likelihood function.

"""
parameters = DataFrame(
columns=["initial", "pmin", "pmax", "vary", "stderr", "name", "dist"]
)
parameters.loc[name + "_sigma"] = (0.05, 1e-10, 1, True, 0.01, name, "uniform")
parameters.loc[name + "_theta"] = (
0.5,
1e-10,
0.99999,
True,
0.2,
name,
"uniform",
)
return parameters

def compute(self, rv, p):
"""Compute the log-likelihood.

Parameters
----------
rv: array
Residuals of the model.
p: array or list
Parameters of the log-likelihood function.

Returns
-------
ln: float
Log-likelihood.

"""
sigma = p[-2]
theta = p[-1]
N = rv.size
ln = -(N - 1) / 2 * log(2 * pi * sigma) + sum(
-((rv[1:] - theta * rv[0:-1]) ** 2) / (2 * sigma)
)
return ln
Loading