From 4f434ca8b4a04fedbf64f2f6ce90a7d18679452c Mon Sep 17 00:00:00 2001 From: Maren Mahsereci <42842079+mmahsereci@users.noreply.github.com> Date: Mon, 28 Nov 2022 12:27:51 +0100 Subject: [PATCH] Initial designs for `quad` (#745) --- .../api/quad/solvers.initial_designs.rst | 5 + docs/source/api/quad/solvers.rst | 5 + src/probnum/quad/_bayesquad.py | 128 ++++++---- src/probnum/quad/solvers/__init__.py | 2 +- .../quad/solvers/_bayesian_quadrature.py | 236 +++++++++++++----- .../quad/solvers/initial_designs/__init__.py | 13 + .../initial_designs/_initial_design.py | 51 ++++ .../solvers/initial_designs/_latin_design.py | 50 ++++ .../solvers/initial_designs/_mc_design.py | 36 +++ .../policies/_van_der_corput_policy.py | 4 +- tests/test_quad/test_bayesian_quadrature.py | 221 ++++++++++++---- tests/test_quad/test_bayesquad/test_bq.py | 42 ++-- tests/test_quad/test_initial_designs.py | 72 ++++++ tests/test_quad/test_policy.py | 2 - 14 files changed, 679 insertions(+), 188 deletions(-) create mode 100644 docs/source/api/quad/solvers.initial_designs.rst create mode 100644 src/probnum/quad/solvers/initial_designs/__init__.py create mode 100644 src/probnum/quad/solvers/initial_designs/_initial_design.py create mode 100644 src/probnum/quad/solvers/initial_designs/_latin_design.py create mode 100644 src/probnum/quad/solvers/initial_designs/_mc_design.py create mode 100644 tests/test_quad/test_initial_designs.py diff --git a/docs/source/api/quad/solvers.initial_designs.rst b/docs/source/api/quad/solvers.initial_designs.rst new file mode 100644 index 000000000..05eb7a0f7 --- /dev/null +++ b/docs/source/api/quad/solvers.initial_designs.rst @@ -0,0 +1,5 @@ +Initial Designs +--------------- +.. automodapi:: probnum.quad.solvers.initial_designs + :no-heading: + :headings: "*" diff --git a/docs/source/api/quad/solvers.rst b/docs/source/api/quad/solvers.rst index c5d6b314c..955111967 100644 --- a/docs/source/api/quad/solvers.rst +++ b/docs/source/api/quad/solvers.rst @@ -19,3 +19,8 @@ probnum.quad.solvers :hidden: solvers.stopping_criteria + +.. toctree:: + :hidden: + + solvers.initial_designs diff --git a/src/probnum/quad/_bayesquad.py b/src/probnum/quad/_bayesquad.py index 6ddf8364e..6dd889989 100644 --- a/src/probnum/quad/_bayesquad.py +++ b/src/probnum/quad/_bayesquad.py @@ -18,23 +18,19 @@ from probnum.quad.typing import DomainLike, DomainType from probnum.randprocs.kernels import Kernel from probnum.randvars import Normal -from probnum.typing import FloatLike, IntLike +from probnum.typing import IntLike def bayesquad( fun: Callable, - input_dim: int, + input_dim: IntLike, kernel: Optional[Kernel] = None, - domain: Optional[DomainLike] = None, measure: Optional[IntegrationMeasure] = None, + domain: Optional[DomainLike] = None, policy: Optional[str] = "bmc", - scale_estimation: Optional[str] = "mle", - max_evals: Optional[IntLike] = None, - var_tol: Optional[FloatLike] = None, - rel_tol: Optional[FloatLike] = None, - batch_size: IntLike = 1, + initial_design: Optional[str] = None, rng: Optional[np.random.Generator] = None, - jitter: FloatLike = 1.0e-8, + options: Optional[dict] = None, ) -> Tuple[Normal, BQIterInfo]: r"""Infer the solution of the uni- or multivariate integral :math:`\int_\Omega f(x) d \mu(x)` @@ -66,44 +62,56 @@ def bayesquad( Input dimension of the integration problem. kernel The kernel used for the GP model. Defaults to the ``ExpQuad`` kernel. + measure + The integration measure. Defaults to the Lebesgue measure on ``domain``. domain The integration domain. Contains lower and upper bound as scalar or ``np.ndarray``. Obsolete if ``measure`` is given. - measure - The integration measure. Defaults to the Lebesgue measure on ``domain``. policy Type of acquisition strategy to use. Defaults to 'bmc'. Options are ========================== ======= Bayesian Monte Carlo [2]_ ``bmc`` - ========================== ======= - - ========================== ======= van Der Corput points ``vdc`` ========================== ======= - scale_estimation - Estimation method to use to compute the scale parameter. Defaults to 'mle'. - Options are - - ============================== ======= - Maximum likelihood estimation ``mle`` - ============================== ======= - - max_evals - Maximum number of function evaluations. - var_tol - Tolerance on the variance of the integral. - rel_tol - Tolerance on consecutive updates of the integral mean. - batch_size - Number of new observations at each update. Defaults to 1. + initial_design + The type of initial design to use. If ``None`` is given, no initial design is + used. Options are + + ========================== ========= + Samples from measure ``mc`` + Latin hypercube [3]_ ``latin`` + ========================== ========= + rng - Random number generator. Used by Bayesian Monte Carlo other random sampling - policies. - jitter - Non-negative jitter to numerically stabilise kernel matrix inversion. - Defaults to 1e-8. + The random number generator used for random methods. + + options + A dictionary with the following optional solver settings + + scale_estimation : Optional[str] + Estimation method to use to compute the scale parameter. Defaults + to 'mle'. Options are + + ============================== ======= + Maximum likelihood estimation ``mle`` + ============================== ======= + + max_evals : Optional[IntLike] + Maximum number of function evaluations. + var_tol : Optional[FloatLike] + Tolerance on the variance of the integral. + rel_tol : Optional[FloatLike] + Tolerance on consecutive updates of the integral mean. + jitter : Optional[FloatLike] + Non-negative jitter to numerically stabilise kernel matrix + inversion. Defaults to 1e-8. + batch_size : Optional[IntLike] + Number of new observations at each update. Defaults to 1. + num_initial_design_nodes : Optional[IntLike] + The number of nodes created by the initial design. Defaults to + ``input_dim * 5`` if an initial design is given. Returns ------- @@ -119,7 +127,8 @@ def bayesquad( Warns ----- - When ``domain`` is given but not used. + UserWarning + When ``domain`` is given but not used. Notes ----- @@ -138,6 +147,8 @@ def bayesquad( computation?, *Statistical Science 34.1*, 2019, 1-22, 2019 .. [2] Rasmussen, C. E., and Z. Ghahramani, Bayesian Monte Carlo, *Advances in Neural Information Processing Systems*, 2003, 505-512. + .. [3] Mckay et al., A Comparison of Three Methods for Selecting Values of Input + Variables in the Analysis of Output from a Computer Code, *Technometrics*, 1979. Examples -------- @@ -162,12 +173,8 @@ def bayesquad( measure=measure, domain=domain, policy=policy, - scale_estimation=scale_estimation, - max_evals=max_evals, - var_tol=var_tol, - rel_tol=rel_tol, - batch_size=batch_size, - jitter=jitter, + initial_design=initial_design, + options=options, ) # Integrate @@ -182,10 +189,9 @@ def bayesquad_from_data( nodes: np.ndarray, fun_evals: np.ndarray, kernel: Optional[Kernel] = None, - domain: Optional[DomainLike] = None, measure: Optional[IntegrationMeasure] = None, - scale_estimation: Optional[str] = "mle", - jitter: FloatLike = 1.0e-8, + domain: Optional[DomainLike] = None, + options: Optional[dict] = None, ) -> Tuple[Normal, BQIterInfo]: r"""Infer the value of an integral from a given set of nodes and function evaluations. @@ -199,16 +205,25 @@ def bayesquad_from_data( *shape=(n_eval,)* -- Function evaluations at ``nodes``. kernel The kernel used for the GP model. Defaults to the ``ExpQuad`` kernel. + measure + The integration measure. Defaults to the Lebesgue measure. domain The integration domain. Contains lower and upper bound as scalar or ``np.ndarray``. Obsolete if ``measure`` is given. - measure - The integration measure. Defaults to the Lebesgue measure. - scale_estimation - Estimation method to use to compute the scale parameter. Defaults to 'mle'. - jitter - Non-negative jitter to numerically stabilise kernel matrix inversion. - Defaults to 1e-8. + options + A dictionary with the following optional solver settings + + scale_estimation : Optional[str] + Estimation method to use to compute the scale parameter. Defaults + to 'mle'. Options are + + ============================== ======= + Maximum likelihood estimation ``mle`` + ============================== ======= + + jitter : Optional[FloatLike] + Non-negative jitter to numerically stabilise kernel matrix + inversion. Defaults to 1e-8. Returns ------- @@ -224,7 +239,8 @@ def bayesquad_from_data( Warns ----- - When ``domain`` is given but not used. + UserWarning + When ``domain`` is given but not used. See Also -------- @@ -256,8 +272,8 @@ def bayesquad_from_data( measure=measure, domain=domain, policy=None, - scale_estimation=scale_estimation, - jitter=jitter, + initial_design=None, + options=options, ) # Integrate @@ -274,6 +290,8 @@ def _check_domain_measure_compatibility( measure: Optional[IntegrationMeasure], ) -> Tuple[int, Optional[DomainType], IntegrationMeasure]: + input_dim = int(input_dim) + # Neither domain nor measure given if domain is None and measure is None: raise ValueError( diff --git a/src/probnum/quad/solvers/__init__.py b/src/probnum/quad/solvers/__init__.py index 35e884f4e..02ec29ec4 100644 --- a/src/probnum/quad/solvers/__init__.py +++ b/src/probnum/quad/solvers/__init__.py @@ -1,6 +1,6 @@ """Bayesian quadrature methods and their components.""" -from . import belief_updates, policies, stopping_criteria +from . import belief_updates, initial_designs, policies, stopping_criteria from ._bayesian_quadrature import BayesianQuadrature from ._bq_state import BQIterInfo, BQState diff --git a/src/probnum/quad/solvers/_bayesian_quadrature.py b/src/probnum/quad/solvers/_bayesian_quadrature.py index 691d36a80..bdeef6257 100644 --- a/src/probnum/quad/solvers/_bayesian_quadrature.py +++ b/src/probnum/quad/solvers/_bayesian_quadrature.py @@ -11,6 +11,7 @@ from probnum.quad.kernel_embeddings import KernelEmbedding from probnum.quad.solvers._bq_state import BQIterInfo, BQState from probnum.quad.solvers.belief_updates import BQBeliefUpdate, BQStandardBeliefUpdate +from probnum.quad.solvers.initial_designs import InitialDesign, LatinDesign, MCDesign from probnum.quad.solvers.policies import Policy, RandomPolicy, VanDerCorputPolicy from probnum.quad.solvers.stopping_criteria import ( BQStoppingCriterion, @@ -22,7 +23,7 @@ from probnum.quad.typing import DomainLike from probnum.randprocs.kernels import ExpQuad, Kernel from probnum.randvars import Normal -from probnum.typing import FloatLike, IntLike +from probnum.typing import IntLike # pylint: disable=too-many-branches, too-complex @@ -46,12 +47,22 @@ class BayesianQuadrature: The inference method. stopping_criterion The criterion that determines convergence. + initial_design + The initial design chooses a set of nodes once, before the acquisition loop with + the policy runs. + + Raises + ------ + ValueError + If ``initial_design`` is given but ``policy`` is not given. See Also -------- - bayesquad : Computes the integral using an acquisition policy. - bayesquad_from_data : Computes the integral :math:`F` using a given dataset of - nodes and function evaluations. + :func:`bayesquad ` : + Computes the integral using an acquisition policy. + :func:`bayesquad_from_data ` : + Computes the integral :math:`F` using a given dataset of nodes and function + evaluations. """ @@ -63,27 +74,31 @@ def __init__( policy: Optional[Policy], belief_update: BQBeliefUpdate, stopping_criterion: BQStoppingCriterion, + initial_design: Optional[InitialDesign], ) -> None: + + if policy is None and initial_design is not None: + raise ValueError( + "An initial design can only be used in combination with a policy." + ) + self.kernel = kernel self.measure = measure self.policy = policy self.belief_update = belief_update self.stopping_criterion = stopping_criterion + self.initial_design = initial_design @classmethod def from_problem( cls, - input_dim: int, + input_dim: IntLike, kernel: Optional[Kernel] = None, measure: Optional[IntegrationMeasure] = None, domain: Optional[DomainLike] = None, policy: Optional[str] = "bmc", - scale_estimation: Optional[str] = "mle", - max_evals: Optional[IntLike] = None, - var_tol: Optional[FloatLike] = None, - rel_tol: Optional[FloatLike] = None, - batch_size: IntLike = 1, - jitter: FloatLike = 1.0e-8, + initial_design: Optional[str] = None, + options: Optional[dict] = None, ) -> "BayesianQuadrature": r"""Creates an instance of this class from a problem description. @@ -101,19 +116,29 @@ def from_problem( policy The policy choosing nodes at which to evaluate the integrand. Choose ``None`` if you want to integrate from a fixed dataset. - scale_estimation - Estimation method to use to compute the scale parameter. Defaults to 'mle'. - max_evals - Maximum number of evaluations as stopping criterion. - var_tol - Variance tolerance as stopping criterion. - rel_tol - Relative tolerance as stopping criterion. - batch_size - Batch size used in node acquisition. Defaults to 1. - jitter - Non-negative jitter to numerically stabilise kernel matrix inversion. - Defaults to 1e-8. + initial_design + The initial design chooses a set of nodes once, before the acquisition loop + with the policy runs. + options + A dictionary with the following optional solver settings + + scale_estimation : Optional[str] + Estimation method to use to compute the scale parameter. Defaults + to 'mle'. + max_evals : Optional[IntLike] + Maximum number of evaluations as stopping criterion. + var_tol : Optional[FloatLike] + Variance tolerance as stopping criterion. + rel_tol : Optional[FloatLike] + Relative tolerance as stopping criterion. + jitter : Optional[FloatLike] + Non-negative jitter to numerically stabilise kernel matrix + inversion. Defaults to 1e-8. + batch_size : Optional[IntLike] + Batch size used in node acquisition. Defaults to 1. + num_initial_design_nodes : Optional[IntLike] + The number of nodes created by the initial design. Defaults to + ``input_dim * 5`` if an initial design is given. Returns ------- @@ -122,12 +147,35 @@ def from_problem( Raises ------ - ValueError - If neither a ``domain`` nor a ``measure`` are given. NotImplementedError - If an unknown ``policy`` is given. + If an unknown ``policy`` or an unknown ``initial_design`` is given. + ValueError + If neither ``domain`` nor ``measure`` is given. + + See Also + -------- + :func:`bayesquad ` : + For details on options for ``policy`` and ``initial_design``. + """ + input_dim = int(input_dim) + + # Set some solver options + if options is None: + options = {} + + max_evals = options.get("max_evals", None) + rel_tol = options.get("rel_tol", None) + var_tol = options.get("var_tol", None) + + scale_estimation = options.get("scale_estimation", "mle") + jitter = options.get("jitter", 1.0e-8) + batch_size = options.get("batch_size", int(1)) + num_initial_design_nodes = options.get( + "num_initial_design_nodes", int(5 * input_dim) + ) + # Set up integration measure if domain is None and measure is None: raise ValueError( @@ -190,18 +238,31 @@ def _stopcrit_or(sc1, sc2): if policy is None: _stopping_criterion = ImmediateStop() + # Select initial design + if initial_design is None: + pass # not to raise the exception + elif initial_design == "mc": + initial_design = MCDesign(num_initial_design_nodes, measure) + elif initial_design == "latin": + initial_design = LatinDesign(num_initial_design_nodes, measure) + else: + raise NotImplementedError( + f"The given initial design ({initial_design}) is unknown." + ) + return cls( kernel=kernel, measure=measure, policy=policy, belief_update=belief_update, stopping_criterion=_stopping_criterion, + initial_design=initial_design, ) def bq_iterator( self, bq_state: BQState, - info: Optional[BQIterInfo], + info: BQIterInfo, fun: Optional[Callable], rng: Optional[np.random.Generator], ) -> Tuple[Normal, BQState, BQIterInfo]: @@ -233,10 +294,6 @@ def bq_iterator( The updated state of the iteration. """ - # Setup iteration info - if info is None: - info = BQIterInfo.from_bq_state(bq_state) - while True: _has_converged = self.stopping_criterion(bq_state, info) @@ -271,12 +328,18 @@ def integrate( fun_evals: Optional[np.ndarray], rng: Optional[np.random.Generator] = None, ) -> Tuple[Normal, BQState, BQIterInfo]: - """Integrates the function ``fun``. + """Integrates a given function. + + The function may be given as a function handle ``fun`` and/or numerically in + terms of ``fun_evals`` at fixed nodes ``nodes``. + + If a policy is defined this method calls the generator ``bq_iterator`` until + the first stopping criterion is met. The initial design is evaluated in a batch + prior to running ``bq_iterator``. + + If no policy is defined this method immediately stops after processing the + given ``nodes``. - ``fun`` may be analytically given, or numerically in terms of ``fun_evals`` at - fixed nodes. This function calls the generator ``bq_iterator`` until the first - stopping criterion is met. It immediately stops after processing the initial - ``nodes`` if ``policy`` is not available. Parameters ---------- @@ -304,36 +367,29 @@ def integrate( ValueError If neither the integrand function ``fun`` nor integrand evaluations ``fun_evals`` are given. - ValueError - If neither ``nodes`` nor ``policy`` is given. ValueError If dimension of ``nodes`` or ``fun_evals`` is incorrect, or if their shapes do not match. ValueError - If ``rng`` is not given but ``policy`` requires it. - """ + If ``rng`` is not given but ``policy`` or ``initial_design`` requires it. + ValueError + If a policy is available but ``fun`` is not given. + ValueError + If no policy is available and no ``nodes`` are given. - # no policy given: Integrate on fixed dataset. - if self.policy is None: - # nodes must be provided if no policy is given. - if nodes is None: - raise ValueError("No policy available: Please provide nodes.") + Warns + ----- + UserWarning + When no policy is given and ``fun`` is ignored. - # Use fun_evals and disregard fun if both are given - if fun is not None and fun_evals is not None: - warnings.warn( - "No policy available: 'fun_evals' are used instead of 'fun'." - ) - fun = None + Notes + ----- + The initial design is evaluated prior to running the ``bq_iterator`` and hence + may not obey the stopping criterion. For example, if stopping is induced via a + maximum number of evaluations (``max_evals``) smaller than the batch size of the + initial design, the initial design will be evaluated nevertheless. - # override stopping condition as no policy is given. - self.stopping_criterion = ImmediateStop() - - elif self.policy.requires_rng and rng is None: - raise ValueError( - f"The policy '{self.policy.__class__.__name__}' requires a random " - f"number generator (rng) to be given." - ) + """ # Check if integrand function is provided if fun is None and fun_evals is None: @@ -342,7 +398,7 @@ def integrate( "'fun_evals'." ) - # Setup initial design + # Setup fixed design if nodes is not None and fun_evals is None: fun_evals = fun(nodes) @@ -362,7 +418,56 @@ def integrate( f"of evaluations." ) - # Setup BQ state: This encodes a zero-mean prior. + # policy given + if self.policy is not None: + + # function handle must be given for policy to work + if fun is None: + raise ValueError("Policy requires ``fun`` to be given.") + + # some policies require and rng + if self.policy.requires_rng and rng is None: + raise ValueError( + f"The policy '{self.policy.__class__.__name__}' requires a random " + f"number generator (rng) to be given." + ) + + # no policy given: Integrate on fixed dataset. + else: + # nodes must be provided if no policy is given. + if nodes is None: + raise ValueError("No policy available: Please provide nodes.") + + # Use fun_evals and disregard fun if both are given + if fun is not None and fun_evals is not None: + warnings.warn( + "No policy available: 'fun_evals' are used instead of 'fun'." + ) + fun = None + + # override stopping condition as no policy is given. + self.stopping_criterion = ImmediateStop() + + # initial design given (which implies policy and fun is given) + if self.initial_design is not None: + + # some designs require and rng + if self.initial_design.requires_rng and rng is None: + raise ValueError( + f"The initial design '{self.initial_design.__class__.__name__}' " + f"requires a random number generator (rng) to be given." + ) + + initial_design_nodes = self.initial_design(rng) + initial_design_fun_evals = fun(initial_design_nodes) + if nodes is not None: + nodes = np.concatenate((nodes, initial_design_nodes), axis=0) + fun_evals = np.append(fun_evals, initial_design_fun_evals) + else: + nodes = initial_design_nodes + fun_evals = initial_design_fun_evals + + # set BQ state: This encodes a zero-mean prior. bq_state = BQState( measure=self.measure, kernel=self.kernel, @@ -370,6 +475,8 @@ def integrate( 0.0, KernelEmbedding(self.kernel, self.measure).kernel_variance() ), ) + + # update BQ state if nodes and evaluations are available if nodes is not None: _, bq_state = self.belief_update( bq_state=bq_state, @@ -377,7 +484,10 @@ def integrate( new_fun_evals=fun_evals, ) - info = None + # set iteration info + info = BQIterInfo.from_bq_state(bq_state) + + # run loop for (_, bq_state, info) in self.bq_iterator(bq_state, info, fun, rng): pass diff --git a/src/probnum/quad/solvers/initial_designs/__init__.py b/src/probnum/quad/solvers/initial_designs/__init__.py new file mode 100644 index 000000000..a1739814c --- /dev/null +++ b/src/probnum/quad/solvers/initial_designs/__init__.py @@ -0,0 +1,13 @@ +"""Initial designs for Bayesian quadrature.""" + +from ._initial_design import InitialDesign +from ._latin_design import LatinDesign +from ._mc_design import MCDesign + +# Public classes and functions. Order is reflected in documentation. +__all__ = ["InitialDesign", "MCDesign", "LatinDesign"] + +# Set correct module paths. Corrects links and module paths in documentation. +InitialDesign.__module__ = "probnum.quad.solvers.initial_designs" +MCDesign.__module__ = "probnum.quad.solvers.initial_designs" +LatinDesign.__module__ = "probnum.quad.solvers.initial_designs" diff --git a/src/probnum/quad/solvers/initial_designs/_initial_design.py b/src/probnum/quad/solvers/initial_designs/_initial_design.py new file mode 100644 index 000000000..bdde8fa9a --- /dev/null +++ b/src/probnum/quad/solvers/initial_designs/_initial_design.py @@ -0,0 +1,51 @@ +"""Abstract base class of an initial design for Bayesian quadrature.""" + +from __future__ import annotations + +import abc +from typing import Optional + +import numpy as np + +from probnum.quad.integration_measures import IntegrationMeasure +from probnum.typing import IntLike + + +# pylint: disable=too-few-public-methods +class InitialDesign(abc.ABC): + """An abstract class for an initial design for Bayesian quadrature. + + Parameters + ---------- + num_nodes + The number of nodes to be designed. + measure + The integration measure. + + """ + + def __init__(self, num_nodes: IntLike, measure: IntegrationMeasure) -> None: + self.num_nodes = int(num_nodes) + self.measure = measure + + @property + @abc.abstractmethod + def requires_rng(self) -> bool: + """Whether the initial design requires a random number generator when called.""" + raise NotImplementedError + + @abc.abstractmethod + def __call__(self, rng: Optional[np.random.Generator]) -> np.ndarray: + """Get the initial nodes. + + Parameters + ---------- + rng + A random number generator. + + Returns + ------- + nodes : + *shape=(num_nodes, input_dim)* -- Initial design nodes. + """ + raise NotImplementedError diff --git a/src/probnum/quad/solvers/initial_designs/_latin_design.py b/src/probnum/quad/solvers/initial_designs/_latin_design.py new file mode 100644 index 000000000..dd36c496b --- /dev/null +++ b/src/probnum/quad/solvers/initial_designs/_latin_design.py @@ -0,0 +1,50 @@ +"""Latin hypercube initial design for Bayesian quadrature.""" + +from __future__ import annotations + +from typing import Optional + +import numpy as np +from scipy.stats import qmc + +from probnum.quad.integration_measures import IntegrationMeasure +from probnum.typing import IntLike + +from ._initial_design import InitialDesign + + +# pylint: disable=too-few-public-methods +class LatinDesign(InitialDesign): + """Initial design for Bayesian quadrature that samples from a Latin hypercube. [1]_ + + Parameters + ---------- + num_nodes + The number of nodes to be designed. + measure + The integration measure. + + References + ---------- + .. [1] Mckay et al., A Comparison of Three Methods for Selecting Values of Input + Variables in the Analysis of Output from a Computer Code. Technometrics, 1979. + + """ + + def __init__(self, num_nodes: IntLike, measure: IntegrationMeasure) -> None: + if np.Inf in np.hstack([abs(measure.domain[0]), abs(measure.domain[1])]): + raise ValueError( + "Latin hypercube samples require a finite domain. " + "At least one dimension seems to be unbounded." + ) + + super().__init__(measure=measure, num_nodes=num_nodes) + + @property + def requires_rng(self) -> bool: + return True + + def __call__(self, rng: Optional[np.random.Generator]) -> np.ndarray: + sampler = qmc.LatinHypercube(d=self.measure.input_dim, seed=rng) + sample = sampler.random(n=self.num_nodes) + return qmc.scale(sample, self.measure.domain[0], self.measure.domain[1]) diff --git a/src/probnum/quad/solvers/initial_designs/_mc_design.py b/src/probnum/quad/solvers/initial_designs/_mc_design.py new file mode 100644 index 000000000..4caf34494 --- /dev/null +++ b/src/probnum/quad/solvers/initial_designs/_mc_design.py @@ -0,0 +1,36 @@ +"""Initial design that samples from the integration measure.""" + +from __future__ import annotations + +from typing import Optional + +import numpy as np + +from probnum.quad.integration_measures import IntegrationMeasure +from probnum.typing import IntLike + +from ._initial_design import InitialDesign + + +# pylint: disable=too-few-public-methods +class MCDesign(InitialDesign): + """Initial design for Bayesian quadrature that samples from the integration measure. + + Parameters + ---------- + num_nodes + The number of nodes to be designed. + measure + The integration measure. + + """ + + def __init__(self, num_nodes: IntLike, measure: IntegrationMeasure) -> None: + super().__init__(measure=measure, num_nodes=num_nodes) + + @property + def requires_rng(self) -> bool: + return True + + def __call__(self, rng: Optional[np.random.Generator]) -> np.ndarray: + return self.measure.sample(self.num_nodes, rng=rng) diff --git a/src/probnum/quad/solvers/policies/_van_der_corput_policy.py b/src/probnum/quad/solvers/policies/_van_der_corput_policy.py index c276f5946..63ab5a3b1 100644 --- a/src/probnum/quad/solvers/policies/_van_der_corput_policy.py +++ b/src/probnum/quad/solvers/policies/_van_der_corput_policy.py @@ -31,7 +31,7 @@ class VanDerCorputPolicy(Policy): The integration measure with finite domain. References - -------- + ---------- .. [1] https://en.wikipedia.org/wiki/Van_der_Corput_sequence """ @@ -43,7 +43,7 @@ def __init__(self, batch_size: IntLike, measure: IntegrationMeasure) -> None: domain_a = measure.domain[0] domain_b = measure.domain[1] - if np.Inf in np.hstack([abs(measure.domain[0]), abs(measure.domain[1])]): + if np.Inf in np.hstack([abs(domain_a), abs(domain_b)]): raise ValueError("Policy 'vdc' works only for bounded domains.") self.domain_a = domain_a diff --git a/tests/test_quad/test_bayesian_quadrature.py b/tests/test_quad/test_bayesian_quadrature.py index 0f36caf4c..838a3931c 100644 --- a/tests/test_quad/test_bayesian_quadrature.py +++ b/tests/test_quad/test_bayesian_quadrature.py @@ -6,6 +6,8 @@ from probnum import LambdaStoppingCriterion from probnum.quad.integration_measures import LebesgueMeasure from probnum.quad.solvers import BayesianQuadrature +from probnum.quad.solvers.belief_updates import BQStandardBeliefUpdate +from probnum.quad.solvers.initial_designs import LatinDesign, MCDesign from probnum.quad.solvers.policies import RandomPolicy, VanDerCorputPolicy from probnum.quad.solvers.stopping_criteria import ( ImmediateStop, @@ -14,6 +16,7 @@ RelativeMeanChange, ) from probnum.randprocs.kernels import ExpQuad +from probnum.randvars import Normal @pytest.fixture @@ -26,7 +29,7 @@ def data(input_dim): def fun(x): return 2 * np.ones(x.shape[0]) - nodes = np.ones([20, input_dim]) + nodes = np.ones([5, input_dim]) fun_evals = fun(nodes) return nodes, fun_evals, fun @@ -35,7 +38,7 @@ def fun(x): def bq(input_dim): return BayesianQuadrature.from_problem( input_dim=input_dim, - domain=(np.zeros(input_dim), np.ones(input_dim)), + domain=(0, 1), ) @@ -43,16 +46,37 @@ def bq(input_dim): def bq_no_policy(input_dim): return BayesianQuadrature.from_problem( input_dim=input_dim, - domain=(np.zeros(input_dim), np.ones(input_dim)), + domain=(0, 1), policy=None, ) -def test_bq_from_problem_wrong_inputs(input_dim): +# ======================================= +# Tests for '__init__' method start here. +# ======================================= - # neither measure nor domain is provided + +def test_bayesian_quadrature_wrong_input(input_dim): + """These exceptions are raised in the __init__ method.""" + measure = LebesgueMeasure(domain=(0, 1), input_dim=input_dim) + + # initial design is given but policy is not given with pytest.raises(ValueError): - BayesianQuadrature.from_problem(input_dim=input_dim) + BayesianQuadrature( + kernel=ExpQuad(input_shape=(input_dim,)), + measure=measure, + policy=None, + belief_update=BQStandardBeliefUpdate(jitter=1e-6, scale_estimation=None), + stopping_criterion=MaxNevals(max_nevals=10), + initial_design=MCDesign(num_nodes=3, measure=measure), + ) + + +# =========================================== +# Tests for 'from_problem' method start here. +# =========================================== + +# Tests for correct assignments start here. @pytest.mark.parametrize( @@ -64,23 +88,15 @@ def test_bq_from_problem_policy_assignment(policy, policy_type): assert isinstance(bq.policy, policy_type) -def test_bq_from_problem_defaults(bq_no_policy, bq): - - # default policy and stopping criterion - assert isinstance(bq.policy, RandomPolicy) - assert isinstance(bq.stopping_criterion, LambdaStoppingCriterion) - - # default stopping criterion if no policy is available - assert bq_no_policy.policy is None - assert isinstance(bq_no_policy.stopping_criterion, ImmediateStop) - - # default measure - assert isinstance(bq_no_policy.measure, LebesgueMeasure) - assert isinstance(bq.measure, LebesgueMeasure) - - # default kernel - assert isinstance(bq_no_policy.kernel, ExpQuad) - assert isinstance(bq.kernel, ExpQuad) +@pytest.mark.parametrize( + "design, design_type", [("mc", MCDesign), ("latin", LatinDesign)] +) +def test_bq_from_problem_initial_design_assignment(design, design_type): + """Test if correct initial design is assigned from string identifier.""" + bq = BayesianQuadrature.from_problem( + input_dim=1, domain=(0, 1), initial_design=design + ) + assert isinstance(bq.initial_design, design_type) @pytest.mark.parametrize( @@ -100,56 +116,171 @@ def test_bq_from_problem_stopping_criterion_assignment(max_evals, var_tol, rel_t bq = BayesianQuadrature.from_problem( input_dim=2, domain=(0, 1), - max_evals=max_evals, - var_tol=var_tol, - rel_tol=rel_tol, + options=dict(max_evals=max_evals, var_tol=var_tol, rel_tol=rel_tol), ) assert isinstance(bq.stopping_criterion, t) -def test_integrate_no_policy_wrong_input(bq_no_policy, data): - # The combination of inputs below is important to trigger the correct exception. - nodes, fun_evals, fun = data +def test_bq_from_problem_defaults(bq, bq_no_policy): - # no nodes provided + # defaults if policy is available + assert isinstance(bq.measure, LebesgueMeasure) + assert isinstance(bq.kernel, ExpQuad) + assert isinstance(bq.stopping_criterion, LambdaStoppingCriterion) + assert isinstance(bq.policy, RandomPolicy) + assert bq.initial_design is None + + # defaults if no policy is available + assert isinstance(bq_no_policy.measure, LebesgueMeasure) + assert isinstance(bq_no_policy.kernel, ExpQuad) + assert isinstance(bq_no_policy.stopping_criterion, ImmediateStop) + assert bq_no_policy.policy is None + assert bq_no_policy.initial_design is None + + +# Tests for input checks and exception raises start here. + + +def test_bq_from_problem_wrong_inputs(input_dim): + + # neither measure nor domain is provided with pytest.raises(ValueError): - bq_no_policy.integrate(fun=None, nodes=None, fun_evals=fun_evals) + BayesianQuadrature.from_problem(input_dim=input_dim) - # fun is ignored if fun_evals are given - with pytest.warns(Warning): - bq_no_policy.integrate(fun=fun, nodes=nodes, fun_evals=fun_evals) + # unknown policy is provided + with pytest.raises(NotImplementedError): + BayesianQuadrature.from_problem( + input_dim=input_dim, domain=(0, 1), policy="unknown_policy" + ) + + # unknown initial_design is provided + with pytest.raises(NotImplementedError): + BayesianQuadrature.from_problem( + input_dim=input_dim, + domain=(0, 1), + policy="bmc", + initial_design="unknown_design", + ) + + +# ======================================== +# Tests for 'integrate' method start here. +# ======================================== + + +@pytest.mark.parametrize("initial_design_provided", [True, False]) +@pytest.mark.parametrize("nodes_provided", [True, False]) +def test_integrate_output_shapes(initial_design_provided, nodes_provided, data, rng): + # the test uses max_evals stopping condition in order to check the shapes + # consistently. + + max_evals = 15 + num_design_nodes = 4 + + # get data + nodes, fun_evals, fun = data + (num_nodes, input_dim) = nodes.shape + + params = dict(input_dim=input_dim, domain=(0, 1)) + options = dict(max_evals=max_evals) + num_updates = max_evals + # get correct shapes and values + if nodes_provided: + num_updates += -num_nodes + 1 + else: + nodes, fun_evals = None, None -def test_integrate_wrong_input(bq, bq_no_policy, data, rng): + if initial_design_provided: + num_updates += -num_design_nodes + (1 - 1 * nodes_provided) * 1 + params["initial_design"] = "mc" + options["num_initial_design_nodes"] = num_design_nodes + + assert num_updates > 1 # make sure that some nodes are collected + + bq = BayesianQuadrature.from_problem(**params, options=options) + res, bq_state, info = bq.integrate( + fun=fun, nodes=nodes, fun_evals=fun_evals, rng=rng + ) + assert isinstance(res, Normal) + assert isinstance(bq_state.integral_belief, Normal) + assert isinstance(bq_state.scale_sq, float) + assert len(bq_state.kernel_means) == max_evals + assert len(bq_state.previous_integral_beliefs) == num_updates + assert bq_state.nodes.shape == (max_evals, input_dim) + assert bq_state.fun_evals.shape == (max_evals,) + assert bq_state.gram.shape == (max_evals, max_evals) + + +# Tests for 'integrate' input checks and exception raises start here. + + +def test_integrate_wrong_input(bq, data, rng): + """Exception tests shared by all bq methods.""" # The combination of inputs below is important to trigger the correct exception. nodes, fun_evals, fun = data - # no integrand provided + # no integrand provided (neither fun nor fun_evals) with pytest.raises(ValueError): bq.integrate(fun=None, nodes=nodes, fun_evals=None, rng=rng) - with pytest.raises(ValueError): - bq_no_policy.integrate(fun=None, nodes=nodes, fun_evals=None) # wrong fun_evals shape with pytest.raises(ValueError): bq.integrate(fun=fun, nodes=nodes, fun_evals=fun_evals[:, None], rng=rng) - with pytest.raises(ValueError): - bq_no_policy.integrate(fun=None, nodes=nodes, fun_evals=fun_evals[:, None]) # wrong nodes shape with pytest.raises(ValueError): - bq.integrate(fun=fun, nodes=nodes[:, None], fun_evals=fun_evals, rng=rng) - with pytest.raises(ValueError): - bq_no_policy.integrate(fun=None, nodes=nodes[:, None], fun_evals=fun_evals) + bq.integrate(fun=fun, nodes=nodes[:, None], fun_evals=None, rng=rng) # number of points in nodes and fun_evals do not match wrong_nodes = np.vstack([nodes, np.ones([1, nodes.shape[1]])]) with pytest.raises(ValueError): bq.integrate(fun=fun, nodes=wrong_nodes, fun_evals=fun_evals, rng=rng) + + +def test_integrate_with_policy_wrong_input(bq, data, rng): + """Exception tests specific to when a policy is given.""" + # The combination of inputs below is important to trigger the correct exception. + + nodes, fun_evals, fun = data + + # a policy always requires fun with pytest.raises(ValueError): - bq_no_policy.integrate(fun=None, nodes=wrong_nodes, fun_evals=fun_evals) + bq.integrate(fun=None, nodes=nodes, fun_evals=fun_evals, rng=rng) # no rng provided but policy requires it with pytest.raises(ValueError): - bq.integrate(fun=fun, nodes=nodes, fun_evals=fun_evals, rng=None) + bq.integrate(fun=fun, nodes=None, fun_evals=None, rng=None) + + +def test_integrate_no_policy_wrong_input(bq_no_policy, data): + """Exception tests specific to when no policy is given.""" + # The combination of inputs below is important to trigger the correct exception. + + nodes, fun_evals, fun = data + + # no nodes provided + with pytest.raises(ValueError): + bq_no_policy.integrate(fun=None, nodes=None, fun_evals=fun_evals) + + # fun is ignored if fun_evals are given + with pytest.warns(Warning): + bq_no_policy.integrate(fun=fun, nodes=nodes, fun_evals=fun_evals) + + +def test_integrate_initial_design_wrong_input(rng): + """Exception tests specific to when an initial design is given.""" + # The combination of inputs below is important to trigger the correct exception. + + # no rng provided but initial design requires it (and policy does not) + with pytest.raises(ValueError): + bq = BayesianQuadrature.from_problem( + input_dim=1, + domain=(0, 1), + policy="vdc", # does not need rng + initial_design="mc", # needs rng + ) + bq.integrate( + fun=lambda x: np.ones(x.shape[0]), nodes=None, fun_evals=None, rng=None + ) diff --git a/tests/test_quad/test_bayesquad/test_bq.py b/tests/test_quad/test_bayesquad/test_bq.py index 9578c8893..79900fe5d 100644 --- a/tests/test_quad/test_bayesquad/test_bq.py +++ b/tests/test_quad/test_bayesquad/test_bq.py @@ -12,11 +12,6 @@ from ..util import gauss_hermite_tensor, gauss_legendre_tensor -@pytest.fixture -def rng(): - return np.random.default_rng(seed=42) - - @pytest.mark.parametrize("input_dim", [1], ids=["dim1"]) def test_type_1d(f1d, kernel, measure, input_dim, rng): """Test that BQ outputs normal random variables for 1D integrands.""" @@ -25,8 +20,8 @@ def test_type_1d(f1d, kernel, measure, input_dim, rng): input_dim=input_dim, kernel=kernel, measure=measure, - max_evals=10, rng=rng, + options=dict(max_evals=10), ) assert isinstance(integral, Normal) @@ -60,6 +55,7 @@ def test_integral_values_1d( measure = LebesgueMeasure(input_dim=input_dim, domain=domain) # numerical integral # pylint: disable=invalid-name + def integrand(x): return f1d(x) * measure(np.atleast_2d(x)) @@ -70,12 +66,14 @@ def integrand(x): kernel=kernel, domain=domain, policy="vdc", - scale_estimation=scale_estimation, - max_evals=250, - var_tol=var_tol, - rel_tol=rel_tol, - jitter=jitter, - rng=rng, + rng=None, + options=dict( + scale_estimation=scale_estimation, + max_evals=250, + var_tol=var_tol, + rel_tol=rel_tol, + jitter=jitter, + ), ) domain = measure.domain num_integral, _ = scipyquad(integrand, domain[0], domain[1]) @@ -102,7 +100,7 @@ def test_integral_values_x2_gaussian(kernel, measure, input_dim, scale_estimatio fun_evals=fun_evals, kernel=kernel, measure=measure, - scale_estimation=scale_estimation, + options=dict(scale_estimation=scale_estimation), ) np.testing.assert_almost_equal(bq_integral.mean, true_integral, decimal=2) @@ -135,8 +133,7 @@ def test_integral_values_sin_lebesgue( fun_evals=fun_evals, kernel=kernel, measure=measure, - scale_estimation=scale_estimation, - jitter=jitter, + options=dict(scale_estimation=scale_estimation, jitter=jitter), ) np.testing.assert_almost_equal(bq_integral.mean, true_integral, decimal=2) @@ -155,10 +152,8 @@ def test_integral_values_kernel_translate(kernel, measure, input_dim, x, rng): input_dim=input_dim, kernel=kernel, measure=measure, - var_tol=1e-8, - max_evals=1000, - batch_size=50, rng=rng, + options=dict(max_evals=1000, var_tol=1e-8, batch_size=50), ) true_integral = kernel_embedding.kernel_mean(np.atleast_2d(translate_point)) np.testing.assert_almost_equal(bq_integral.mean, true_integral, decimal=2) @@ -210,10 +205,17 @@ def test_zero_function_gives_zero_variance_with_mle(rng): fun_evals = fun(nodes) bq_integral1, _ = bayesquad( - fun=fun, input_dim=input_dim, domain=domain, scale_estimation="mle", rng=rng + fun=fun, + input_dim=input_dim, + domain=domain, + rng=rng, + options=dict(scale_estimation="mle"), ) bq_integral2, _ = bayesquad_from_data( - nodes=nodes, fun_evals=fun_evals, domain=domain, scale_estimation="mle" + nodes=nodes, + fun_evals=fun_evals, + domain=domain, + options=dict(scale_estimation="mle"), ) assert bq_integral1.var == 0.0 assert bq_integral2.var == 0.0 diff --git a/tests/test_quad/test_initial_designs.py b/tests/test_quad/test_initial_designs.py new file mode 100644 index 000000000..f0b452ea7 --- /dev/null +++ b/tests/test_quad/test_initial_designs.py @@ -0,0 +1,72 @@ +"""Test cases for initial designs.""" + + +# New designs need to be added to the fixture 'design'. + + +from typing import Tuple + +import pytest + +from probnum.quad.integration_measures import GaussianMeasure, LebesgueMeasure +from probnum.quad.solvers.initial_designs import InitialDesign, LatinDesign, MCDesign + + +@pytest.fixture +def input_dim(): + return 2 + + +@pytest.fixture +def num_nodes(): + return 5 + + +@pytest.fixture +def lebesgue_measure(input_dim): + return LebesgueMeasure(domain=(0, 1), input_dim=input_dim) + + +@pytest.fixture +def gaussian_measure(input_dim): + return GaussianMeasure(mean=0.0, cov=1.0, input_dim=input_dim) + + +@pytest.fixture( + params=[ + pytest.param(des, id=des[0].__name__) + for des in [ + (MCDesign, "lebesgue_measure", dict(requires_rng=True)), + (MCDesign, "gaussian_measure", dict(requires_rng=True)), + (LatinDesign, "lebesgue_measure", dict(requires_rng=True)), + ] + ], +) +def design(request, num_nodes) -> Tuple[InitialDesign, dict]: + measure = request.getfixturevalue(request.param[1]) + values = request.param[2] + return request.param[0](**dict(measure=measure, num_nodes=num_nodes)), values + + +# Tests shared by all designs start here. + + +def test_initial_design_attribute_values(design, num_nodes): + design, values = design + assert design.num_nodes == num_nodes + assert design.requires_rng is values["requires_rng"] + + +def test_initial_design_shapes(design, rng, input_dim, num_nodes): + design, _ = design + res = design(rng) # get nodes + assert res.shape == (num_nodes, input_dim) + + +# Tests specific to LatinDesign start here. + + +def test_latin_design_wrong_domain(gaussian_measure, num_nodes): + # Latin design requires finite domain but Gaussian measure has infinite domain) + with pytest.raises(ValueError): + LatinDesign(num_nodes, gaussian_measure) diff --git a/tests/test_quad/test_policy.py b/tests/test_quad/test_policy.py index 86402dda4..76e5a9e94 100644 --- a/tests/test_quad/test_policy.py +++ b/tests/test_quad/test_policy.py @@ -1,6 +1,5 @@ """Basic tests for BQ policies.""" - # New policies need to be added to the fixtures 'policy_name' and 'policy_params' # and 'policy'. @@ -139,7 +138,6 @@ def test_van_der_corput_partial(): vdc_seq = VanDerCorputPolicy.van_der_corput_sequence(n_start, n_end) expected_seq = np.array([0.75, 0.125, 0.625, 0.375, 0.875]) np.testing.assert_array_equal(vdc_seq, expected_seq) - (n_start, n_end) = (4, 11) vdc_seq = VanDerCorputPolicy.van_der_corput_sequence(n_start, n_end) expected_seq = np.array([0.125, 0.625, 0.375, 0.875, 0.0625, 0.5625, 0.3125])