diff --git a/botorch/models/approximate_gp.py b/botorch/models/approximate_gp.py index 9c8c9c26fd..b1b30ac3a6 100644 --- a/botorch/models/approximate_gp.py +++ b/botorch/models/approximate_gp.py @@ -146,6 +146,13 @@ def fantasize(self, X, sampler=MCSampler, observation_noise=True, *args, **kwarg class _SingleTaskVariationalGP(ApproximateGP): + """ + Base class wrapper for a stochastic variational Gaussian Process (SVGP) + model [hensman2013svgp]_. + + Uses pivoted Cholesky initialization for the inducing points. + """ + def __init__( self, train_X: Tensor, @@ -159,10 +166,6 @@ def __init__( inducing_points: Optional[Union[Tensor, int]] = None, ) -> None: r""" - Base class wrapper for a stochastic variational Gaussian Process (SVGP) - model [hensman2013svgp]_. Uses pivoted cholesky initialization for the - inducing points. - Args: train_X: Training inputs (due to the ability of the SVGP to sub-sample this does not have to be all of the training inputs). @@ -255,7 +258,7 @@ def forward(self, X) -> MultivariateNormal: class SingleTaskVariationalGP(ApproximateGPyTorchModel): r"""A single-task variational GP model following [hensman2013svgp]_ with pivoted - cholesky initialization following [chen2018dpp]_ and [burt2020svgp]_. + Cholesky initialization following [chen2018dpp]_ and [burt2020svgp]_. A single-task variational GP using relatively strong priors on the Kernel hyperparameters, which work best when covariates are normalized to the unit @@ -269,11 +272,19 @@ class SingleTaskVariationalGP(ApproximateGPyTorchModel): Use this model if you have a lot of data or if your responses are non-Gaussian. - To train this model, you should use `gpytorch.mlls.VariationalELBO` and not the - exact marginal log likelihood. Example mll: - - mll = VariationalELBO(model.likelihood, model.model, num_data=train_X.shape[-2]) - + To train this model, you should use gpytorch.mlls.VariationalELBO and not + the exact marginal log likelihood. + + Example: + >>> import torch + >>> from botorch.models import SingleTaskVariationalGP + >>> from gpytorch.mlls import VariationalELBO + >>> + >>> train_X = torch.rand(20, 2) + >>> model = SingleTaskVariationalGP(train_X) + >>> mll = VariationalELBO( + >>> model.likelihood, model.model, num_data=train_X.shape[-2] + >>> ) """ def __init__( @@ -292,15 +303,11 @@ def __init__( input_transform: Optional[InputTransform] = None, ) -> None: r""" - A single task stochastic variational Gaussian process model (SVGP) as described - by [hensman2013svgp]_. We use pivoted cholesky initialization [burt2020svgp]_ to - initialize the inducing points of the model. - Args: train_X: Training inputs (due to the ability of the SVGP to sub-sample this does not have to be all of the training inputs). train_Y: Training targets (optional). - likelihood: Instance of a GPyYorch likelihood. If omitted, uses a + likelihood: Instance of a GPyTorch likelihood. If omitted, uses a either a `GaussianLikelihood` (if `num_outputs=1`) or a `MultitaskGaussianLikelihood`(if `num_outputs>1`). num_outputs: Number of output responses per input (default: 1). diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py index 620ea3a782..e71d7837bf 100644 --- a/botorch/models/fully_bayesian.py +++ b/botorch/models/fully_bayesian.py @@ -6,9 +6,21 @@ r"""Gaussian Process Regression models with fully Bayesian inference. -We use a lightweight PyTorch implementation of a Matern-5/2 kernel as there are some -performance issues with running NUTS on top of standard GPyTorch models. The resulting -hyperparameter samples are loaded into a batched GPyTorch model after fitting. +Fully Bayesian models use Bayesian inference over model hyperparameters, such +as lengthscales and noise variance, learning a posterior distribution for the +hyperparameters using the No-U-Turn-Sampler (NUTS). This is followed by +sampling a small set of hyperparameters (often ~16) from the posterior +that we will use for model predictions and for computing acquisition function +values. By contrast, our “standard” models (e.g. +`SingleTaskGP`) learn only a single best value for each hyperparameter using +MAP. The fully Bayesian method generally results in a better and more +well-calibrated model, but is more computationally intensive. For a full +description, see [Eriksson2021saasbo]. + +We use a lightweight PyTorch implementation of a Matern-5/2 kernel as there are +some performance issues with running NUTS on top of standard GPyTorch models. +The resulting hyperparameter samples are loaded into a batched GPyTorch model +after fitting. References: @@ -71,9 +83,19 @@ def reshape_and_detach(target: Tensor, new_value: Tensor) -> None: class PyroModel: r""" - Base class for a Pyro model. - - :meta ignore: + Base class for a Pyro model; used to assist in learning hyperparameters. + + This class and its subclasses are not a standard BoTorch models; instead + the subclasses are used as inputs to a `SaasFullyBayesianSingleTaskGP`, + which should then have its hyperparameters fit with + `fit_fully_bayesian_model_nuts`. (By default, its subclass `SaasPyroModel` + is used). A `PyroModel`’s `sample` method should specify lightweight + PyTorch functionality, which will be used for fast model fitting with NUTS. + The utility of `PyroModel` is in enabling fast fitting with NUTS, since we + would otherwise need to use GPyTorch, which is computationally infeasible + in combination with Pyro. + + :meta private: """ def set_inputs( @@ -115,6 +137,12 @@ class SaasPyroModel(PyroModel): The SAAS model uses sparsity-inducing priors to identift the most important parameters. This model is suitable for high-dimensional BO with potentially hundreds of tunable parameters. See [Eriksson2021saasbo]_ for more details. + + `SaasPyroModel` is not a standard BoTorch model; instead, it is used as + an input to `SaasFullyBayesianSingleTaskGP`. It is used as a default keyword + argument, and end users are not likely to need to instantiate or modify a + `SaasPyroModel` unless they want to customize its attributes (such as + `covar_module`). """ def sample(self) -> None: @@ -274,9 +302,9 @@ class SaasFullyBayesianSingleTaskGP(SingleTaskGP): isn't compatible with `fit_gpytorch_model`. Example: - >>> saas_gp = SaasFullyBayesianSingleTaskGP(train_X, train_Y) - >>> fit_fully_bayesian_model_nuts(saas_gp) - >>> posterior = saas_gp.posterior(test_X) + >>> saas_gp = SaasFullyBayesianSingleTaskGP(train_X, train_Y) + >>> fit_fully_bayesian_model_nuts(saas_gp) + >>> posterior = saas_gp.posterior(test_X) """ def __init__(