From aee4b43372e8108408e2456fde604af90c96e1b6 Mon Sep 17 00:00:00 2001 From: Elizabeth Santorella Date: Fri, 29 Jul 2022 19:12:13 -0400 Subject: [PATCH 1/3] docstrings for approximate_gp and fully_bayesian --- botorch/models/approximate_gp.py | 37 ++++++++++++++++++------------- botorch/models/fully_bayesian.py | 38 +++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/botorch/models/approximate_gp.py b/botorch/models/approximate_gp.py index 9c8c9c26fd..b1b30ac3a6 100644 --- a/botorch/models/approximate_gp.py +++ b/botorch/models/approximate_gp.py @@ -146,6 +146,13 @@ def fantasize(self, X, sampler=MCSampler, observation_noise=True, *args, **kwarg class _SingleTaskVariationalGP(ApproximateGP): + """ + Base class wrapper for a stochastic variational Gaussian Process (SVGP) + model [hensman2013svgp]_. + + Uses pivoted Cholesky initialization for the inducing points. + """ + def __init__( self, train_X: Tensor, @@ -159,10 +166,6 @@ def __init__( inducing_points: Optional[Union[Tensor, int]] = None, ) -> None: r""" - Base class wrapper for a stochastic variational Gaussian Process (SVGP) - model [hensman2013svgp]_. Uses pivoted cholesky initialization for the - inducing points. - Args: train_X: Training inputs (due to the ability of the SVGP to sub-sample this does not have to be all of the training inputs). @@ -255,7 +258,7 @@ def forward(self, X) -> MultivariateNormal: class SingleTaskVariationalGP(ApproximateGPyTorchModel): r"""A single-task variational GP model following [hensman2013svgp]_ with pivoted - cholesky initialization following [chen2018dpp]_ and [burt2020svgp]_. + Cholesky initialization following [chen2018dpp]_ and [burt2020svgp]_. A single-task variational GP using relatively strong priors on the Kernel hyperparameters, which work best when covariates are normalized to the unit @@ -269,11 +272,19 @@ class SingleTaskVariationalGP(ApproximateGPyTorchModel): Use this model if you have a lot of data or if your responses are non-Gaussian. - To train this model, you should use `gpytorch.mlls.VariationalELBO` and not the - exact marginal log likelihood. Example mll: - - mll = VariationalELBO(model.likelihood, model.model, num_data=train_X.shape[-2]) - + To train this model, you should use gpytorch.mlls.VariationalELBO and not + the exact marginal log likelihood. + + Example: + >>> import torch + >>> from botorch.models import SingleTaskVariationalGP + >>> from gpytorch.mlls import VariationalELBO + >>> + >>> train_X = torch.rand(20, 2) + >>> model = SingleTaskVariationalGP(train_X) + >>> mll = VariationalELBO( + >>> model.likelihood, model.model, num_data=train_X.shape[-2] + >>> ) """ def __init__( @@ -292,15 +303,11 @@ def __init__( input_transform: Optional[InputTransform] = None, ) -> None: r""" - A single task stochastic variational Gaussian process model (SVGP) as described - by [hensman2013svgp]_. We use pivoted cholesky initialization [burt2020svgp]_ to - initialize the inducing points of the model. - Args: train_X: Training inputs (due to the ability of the SVGP to sub-sample this does not have to be all of the training inputs). train_Y: Training targets (optional). - likelihood: Instance of a GPyYorch likelihood. If omitted, uses a + likelihood: Instance of a GPyTorch likelihood. If omitted, uses a either a `GaussianLikelihood` (if `num_outputs=1`) or a `MultitaskGaussianLikelihood`(if `num_outputs>1`). num_outputs: Number of output responses per input (default: 1). diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py index 620ea3a782..a3ed499d79 100644 --- a/botorch/models/fully_bayesian.py +++ b/botorch/models/fully_bayesian.py @@ -6,6 +6,16 @@ r"""Gaussian Process Regression models with fully Bayesian inference. +Fully Bayesian models use Bayesian inference over model hyperparameters, such +as length scales and noise variance, learning a posterior distribution for each +hyperparameter using NUTS. When we predict and compute acquisition functions +from a fully Bayesian model, we are using varying sets of hyperparameters +drawn from this posterior. By contrast, our “standard” models (e.g. +`SingleTaskGP`) learn only a single best value for each hyperparameter using +MAP. The fully Bayesian method generally results in a better and more +well-calibrated model, but is more computationally intensive. For a full +description, see [Eriksson2021saasbo]. + We use a lightweight PyTorch implementation of a Matern-5/2 kernel as there are some performance issues with running NUTS on top of standard GPyTorch models. The resulting hyperparameter samples are loaded into a batched GPyTorch model after fitting. @@ -71,9 +81,19 @@ def reshape_and_detach(target: Tensor, new_value: Tensor) -> None: class PyroModel: r""" - Base class for a Pyro model. - - :meta ignore: + Base class for a Pyro model; used to assist in learning hyperparameters. + + This class and its subclasses are not a standard BoTorch models; instead + the subclasses are used as inputs to a `SaasFullyBayesianSingleTaskGP`, + which should then have its hyperparameters fit with + `fit_fully_bayesian_model_nuts`. (By default, its subclass `SaasPyroModel` + is used). A `PyroModel`’s `sample` method should specify lightweight + PyTorch functionality, which will be used for fast model fitting with NUTS. + The utility of `PyroModel` is in enabling fast fitting with NUTS, since we + would otherwise need to use GPyTorch, which is computationally infeasible + in combination with Pyro. + + :meta private: """ def set_inputs( @@ -115,6 +135,12 @@ class SaasPyroModel(PyroModel): The SAAS model uses sparsity-inducing priors to identift the most important parameters. This model is suitable for high-dimensional BO with potentially hundreds of tunable parameters. See [Eriksson2021saasbo]_ for more details. + + `SaasPyroModel` is not a standard BoTorch model; instead, it is used as + an input to `SaasFullyBayesianSingleTaskGP`. It is used as a default keyword + argument, and end users are not likely to need to instantiate or modify a + `SaasPyroModel` unless they want to customize its attributes (such as + `covar_module`). """ def sample(self) -> None: @@ -274,9 +300,9 @@ class SaasFullyBayesianSingleTaskGP(SingleTaskGP): isn't compatible with `fit_gpytorch_model`. Example: - >>> saas_gp = SaasFullyBayesianSingleTaskGP(train_X, train_Y) - >>> fit_fully_bayesian_model_nuts(saas_gp) - >>> posterior = saas_gp.posterior(test_X) + >>> saas_gp = SaasFullyBayesianSingleTaskGP(train_X, train_Y) + >>> fit_fully_bayesian_model_nuts(saas_gp) + >>> posterior = saas_gp.posterior(test_X) """ def __init__( From 3d8e8938213f3f20fe47b81d38ec860c5426a96d Mon Sep 17 00:00:00 2001 From: Elizabeth Santorella Date: Tue, 2 Aug 2022 10:56:14 -0400 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: David Eriksson --- botorch/models/fully_bayesian.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py index a3ed499d79..0bddcf17cb 100644 --- a/botorch/models/fully_bayesian.py +++ b/botorch/models/fully_bayesian.py @@ -7,10 +7,11 @@ r"""Gaussian Process Regression models with fully Bayesian inference. Fully Bayesian models use Bayesian inference over model hyperparameters, such -as length scales and noise variance, learning a posterior distribution for each -hyperparameter using NUTS. When we predict and compute acquisition functions -from a fully Bayesian model, we are using varying sets of hyperparameters -drawn from this posterior. By contrast, our “standard” models (e.g. +as lengthscales and noise variance, learning a posterior distribution for the +hyperparameters using the No-U-Turn-Sampler (NUTS). This is followed by +sampling a small set of hyperparameters (often ~16) from the posterior +that we will use for model predictions and for computing acquisition function +values. By contrast, our “standard” models (e.g. `SingleTaskGP`) learn only a single best value for each hyperparameter using MAP. The fully Bayesian method generally results in a better and more well-calibrated model, but is more computationally intensive. For a full From 2a506e9e726310f3e25f1bf4ab4f3f5030b2c3f3 Mon Sep 17 00:00:00 2001 From: Elizabeth Santorella Date: Tue, 2 Aug 2022 11:03:07 -0400 Subject: [PATCH 3/3] Fixed trailing whitespace --- botorch/models/fully_bayesian.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py index 0bddcf17cb..e71d7837bf 100644 --- a/botorch/models/fully_bayesian.py +++ b/botorch/models/fully_bayesian.py @@ -10,16 +10,17 @@ as lengthscales and noise variance, learning a posterior distribution for the hyperparameters using the No-U-Turn-Sampler (NUTS). This is followed by sampling a small set of hyperparameters (often ~16) from the posterior -that we will use for model predictions and for computing acquisition function +that we will use for model predictions and for computing acquisition function values. By contrast, our “standard” models (e.g. `SingleTaskGP`) learn only a single best value for each hyperparameter using MAP. The fully Bayesian method generally results in a better and more well-calibrated model, but is more computationally intensive. For a full description, see [Eriksson2021saasbo]. -We use a lightweight PyTorch implementation of a Matern-5/2 kernel as there are some -performance issues with running NUTS on top of standard GPyTorch models. The resulting -hyperparameter samples are loaded into a batched GPyTorch model after fitting. +We use a lightweight PyTorch implementation of a Matern-5/2 kernel as there are +some performance issues with running NUTS on top of standard GPyTorch models. +The resulting hyperparameter samples are loaded into a batched GPyTorch model +after fitting. References: