diff --git a/botorch/models/fidelity/__init__.py b/botorch/models/fidelity/__init__.py new file mode 100644 index 0000000000..80f1c0f7dc --- /dev/null +++ b/botorch/models/fidelity/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .gp_regression_fidelity import SingleTaskMultiFidelityGP + + +__all__ = ["SingleTaskMultiFidelityGP"] diff --git a/botorch/models/fidelity/gp_regression_fidelity.py b/botorch/models/fidelity/gp_regression_fidelity.py new file mode 100644 index 0000000000..3a861331a4 --- /dev/null +++ b/botorch/models/fidelity/gp_regression_fidelity.py @@ -0,0 +1,105 @@ +#! /usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +r""" +Gaussian Process Regression models based on GPyTorch models. +""" + +from typing import Optional + +import torch +from botorch.exceptions import UnsupportedError +from botorch.models.fidelity_kernels.downsampling_kernel import DownsamplingKernel +from botorch.models.fidelity_kernels.exponential_decay_kernel import ExpDecayKernel +from gpytorch.kernels.rbf_kernel import RBFKernel +from gpytorch.kernels.scale_kernel import ScaleKernel +from gpytorch.likelihoods.likelihood import Likelihood +from gpytorch.priors.torch_priors import GammaPrior +from torch import Tensor + +from ..gp_regression import SingleTaskGP + + +class SingleTaskMultiFidelityGP(SingleTaskGP): + r"""A single task multi-fidelity GP model. + + A sub-class of SingleTaskGP model. By default the last two dimensions of train_X + are the fidelity parameters: training iterations, training data points. + The kernel comes from this paper `https://arxiv.org/abs/1903.04703` + + Args: + train_X: A `n x (d + s)` or `batch_shape x n x (d + s) ` (batch mode) tensor + of training features, s is the dimension of the fidelity parameters. + train_Y: A `n x (o)` or `batch_shape x n x (o)` (batch mode) tensor of + training observations. + train_iteration_fidelity: An indicator of whether we have the training + iteration fidelity variable. + train_data_fidelity: An indicator of whether we have the downsampling + fidelity variable. If train_iteration_fidelity and train_data_fidelity + are both True, the last and second last columns are treated as the + training data points fidelity parameter and training iteration + number fidelity parameter respectively. Otherwise the last column of + train_X is treated as the fidelity parameter with True indicator. + We assume train_X has at least one fidelity parameter. + likelihood: A likelihood. If omitted, use a standard + GaussianLikelihood with inferred noise level. + + Example: + >>> train_X = torch.rand(20, 4) + >>> train_Y = train_X.pow(2).sum(dim=-1) + >>> model = SingleTaskMultiFidelityGP(train_X, train_Y) + """ + + def __init__( + self, + train_X: Tensor, + train_Y: Tensor, + train_iteration_fidelity: bool = True, + train_data_fidelity: bool = True, + likelihood: Optional[Likelihood] = None, + ) -> None: + train_X, train_Y, _ = self._set_dimensions(train_X=train_X, train_Y=train_Y) + num_fidelity = train_iteration_fidelity + train_data_fidelity + ard_num_dims = train_X.shape[-1] - num_fidelity + active_dimsX = list(range(train_X.shape[-1] - num_fidelity)) + rbf_kernel = RBFKernel( + ard_num_dims=ard_num_dims, + batch_shape=self._aug_batch_shape, + lengthscale_prior=GammaPrior(3.0, 6.0), + active_dims=active_dimsX, + ) + exp_kernel = ExpDecayKernel( + batch_shape=self._aug_batch_shape, + lengthscale_prior=GammaPrior(3.0, 6.0), + offset_prior=GammaPrior(3.0, 6.0), + power_prior=GammaPrior(3.0, 6.0), + ) + ds_kernel = DownsamplingKernel( + batch_shape=self._aug_batch_shape, + offset_prior=GammaPrior(3.0, 6.0), + power_prior=GammaPrior(3.0, 6.0), + ) + if train_iteration_fidelity and train_data_fidelity: + active_dimsS1 = [train_X.shape[-1] - 1] + active_dimsS2 = [train_X.shape[-1] - 2] + exp_kernel.active_dims = torch.tensor(active_dimsS1) + ds_kernel.active_dims = torch.tensor(active_dimsS2) + kernel = rbf_kernel * exp_kernel * ds_kernel + elif train_iteration_fidelity or train_data_fidelity: + active_dimsS = [train_X.shape[-1] - 1] + if train_iteration_fidelity: + exp_kernel.active_dims = torch.tensor(active_dimsS) + kernel = rbf_kernel * exp_kernel + else: + ds_kernel.active_dims = torch.tensor(active_dimsS) + kernel = rbf_kernel * ds_kernel + else: + raise UnsupportedError("You should have at least one fidelity parameter.") + covar_module = ScaleKernel( + kernel, + batch_shape=self._aug_batch_shape, + outputscale_prior=GammaPrior(2.0, 0.15), + ) + super().__init__(train_X=train_X, train_Y=train_Y, covar_module=covar_module) + self.to(train_X) diff --git a/botorch/models/gp_regression.py b/botorch/models/gp_regression.py index 6b4566d592..1ae42f5ede 100644 --- a/botorch/models/gp_regression.py +++ b/botorch/models/gp_regression.py @@ -22,6 +22,7 @@ from gpytorch.likelihoods.noise_models import HeteroskedasticNoise from gpytorch.means.constant_mean import ConstantMean from gpytorch.models.exact_gp import ExactGP +from gpytorch.module import Module from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior from gpytorch.priors.torch_priors import GammaPrior from torch import Tensor @@ -52,7 +53,11 @@ class SingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP): """ def __init__( - self, train_X: Tensor, train_Y: Tensor, likelihood: Optional[Likelihood] = None + self, + train_X: Tensor, + train_Y: Tensor, + likelihood: Optional[Likelihood] = None, + covar_module: Optional[Module] = None, ) -> None: r"""A single-task exact GP model. @@ -63,13 +68,14 @@ def __init__( training observations. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. + covar_module: The covariance (kernel) matrix. If omitted, use the + MaternKernel. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X[:, 0]) + torch.cos(train_X[:, 1]) >>> model = SingleTaskGP(train_X, train_Y) """ - ard_num_dims = train_X.shape[-1] train_X, train_Y, _ = self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = multioutput_to_batch_mode_transform( train_X=train_X, train_Y=train_Y, num_outputs=self._num_outputs @@ -90,16 +96,19 @@ def __init__( self._is_custom_likelihood = True ExactGP.__init__(self, train_X, train_Y, likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) - self.covar_module = ScaleKernel( - MaternKernel( - nu=2.5, - ard_num_dims=ard_num_dims, + if covar_module is None: + self.covar_module = ScaleKernel( + MaternKernel( + nu=2.5, + ard_num_dims=train_X.shape[-1], + batch_shape=self._aug_batch_shape, + lengthscale_prior=GammaPrior(3.0, 6.0), + ), batch_shape=self._aug_batch_shape, - lengthscale_prior=GammaPrior(3.0, 6.0), - ), - batch_shape=self._aug_batch_shape, - outputscale_prior=GammaPrior(2.0, 0.15), - ) + outputscale_prior=GammaPrior(2.0, 0.15), + ) + else: + self.covar_module = covar_module self.to(train_X) def forward(self, x: Tensor) -> MultivariateNormal: @@ -136,7 +145,6 @@ def __init__(self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor) -> None >>> train_Yvar = torch.full_like(train_Y, 0.2) >>> model = FixedNoiseGP(train_X, train_Y, train_Yvar) """ - ard_num_dims = train_X.shape[-1] train_X, train_Y, train_Yvar = self._set_dimensions( train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar ) @@ -156,7 +164,7 @@ def __init__(self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor) -> None self.covar_module = ScaleKernel( base_kernel=MaternKernel( nu=2.5, - ard_num_dims=ard_num_dims, + ard_num_dims=train_X.shape[-1], batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ), diff --git a/test/models/fidelity/__init__.py b/test/models/fidelity/__init__.py new file mode 100644 index 0000000000..30a40e9e93 --- /dev/null +++ b/test/models/fidelity/__init__.py @@ -0,0 +1,3 @@ +#! /usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved diff --git a/test/models/fidelity/test_gp_regression_fidelity.py b/test/models/fidelity/test_gp_regression_fidelity.py new file mode 100644 index 0000000000..9fa0219ea9 --- /dev/null +++ b/test/models/fidelity/test_gp_regression_fidelity.py @@ -0,0 +1,334 @@ +#! /usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import math +import unittest + +import torch +from botorch import fit_gpytorch_model +from botorch.exceptions import UnsupportedError +from botorch.models.fidelity.gp_regression_fidelity import SingleTaskMultiFidelityGP +from botorch.models.gp_regression import FixedNoiseGP +from botorch.posteriors import GPyTorchPosterior +from botorch.sampling import SobolQMCNormalSampler +from gpytorch.kernels.scale_kernel import ScaleKernel +from gpytorch.means import ConstantMean +from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood + + +def _get_random_data_with_fidelity( + batch_shape, + num_outputs, + n=10, + train_iteration_fidelity=True, + train_data_fidelity=True, + **tkwargs, +): + m = train_iteration_fidelity + train_data_fidelity + train_x = torch.linspace(0, 0.95, n, **tkwargs).unsqueeze(-1) + 0.05 * torch.rand( + n, 1, **tkwargs + ).repeat(batch_shape + torch.Size([1, 1])) + s = torch.rand(n, m, **tkwargs).repeat(batch_shape + torch.Size([1, 1])) + train_X = torch.cat((train_x, s), dim=-1) + train_y = ( + torch.sin(train_x * (2 * math.pi)) + + 0.2 + * torch.randn(n, num_outputs, **tkwargs).repeat( + batch_shape + torch.Size([1, 1]) + ) + + (1 - s).pow(2).sum(dim=-1).unsqueeze(-1) + ) + if num_outputs == 1: + train_y = train_y.squeeze(-1) + return train_X, train_y + + +class TestSingleTaskGPFidelity(unittest.TestCase): + def _get_model_and_data( + self, + train_iteration_fidelity, + train_data_fidelity, + batch_shape, + num_outputs, + **tkwargs, + ): + train_X, train_Y = _get_random_data_with_fidelity( + batch_shape=batch_shape, + num_outputs=num_outputs, + train_iteration_fidelity=train_iteration_fidelity, + train_data_fidelity=train_data_fidelity, + **tkwargs, + ) + model_kwargs = { + "train_X": train_X, + "train_Y": train_Y, + "train_iteration_fidelity": train_iteration_fidelity, + "train_data_fidelity": train_data_fidelity, + } + model = SingleTaskMultiFidelityGP(**model_kwargs) + return model, model_kwargs + + def test_exception_message(self): + train_X = torch.rand(20, 4) + train_Y = train_X.pow(2).sum(dim=-1) + GP = SingleTaskMultiFidelityGP + with self.assertRaises(UnsupportedError): + GP( + train_X, + train_Y, + train_iteration_fidelity=False, + train_data_fidelity=False, + ) + + def test_exception_message_cuda(self): + if torch.cuda.is_available(): + self.test_exception_message(cuda=True) + + def test_gp(self, cuda=False): + for (train_iteration_fidelity, train_data_fidelity) in [ + (False, True), + (True, False), + (True, True), + ]: + for batch_shape in (torch.Size(), torch.Size([2])): + for num_outputs in (1, 2): + for double in (False, True): + num_dim = 1 + train_iteration_fidelity + train_data_fidelity + tkwargs = { + "device": torch.device("cuda") + if cuda + else torch.device("cpu"), + "dtype": torch.double if double else torch.float, + } + model, _ = self._get_model_and_data( + batch_shape=batch_shape, + num_outputs=num_outputs, + train_iteration_fidelity=train_iteration_fidelity, + train_data_fidelity=train_data_fidelity, + **tkwargs, + ) + mll = ExactMarginalLogLikelihood(model.likelihood, model).to( + **tkwargs + ) + fit_gpytorch_model( + mll, sequential=False, options={"maxiter": 1} + ) + + # test init + self.assertIsInstance(model.mean_module, ConstantMean) + self.assertIsInstance(model.covar_module, ScaleKernel) + + # test param sizes + params = dict(model.named_parameters()) + for p in params: + self.assertEqual( + params[p].numel(), + num_outputs * torch.tensor(batch_shape).prod().item(), + ) + + # test posterior + # test non batch evaluation + X = torch.rand( + batch_shape + torch.Size([3, num_dim]), **tkwargs + ) + posterior = model.posterior(X) + self.assertIsInstance(posterior, GPyTorchPosterior) + self.assertEqual( + posterior.mean.shape, + batch_shape + torch.Size([3, num_outputs]), + ) + # test batch evaluation + X = torch.rand( + torch.Size([2]) + batch_shape + torch.Size([3, num_dim]), + **tkwargs, + ) + posterior = model.posterior(X) + self.assertIsInstance(posterior, GPyTorchPosterior) + self.assertEqual( + posterior.mean.shape, + torch.Size([2]) + + batch_shape + + torch.Size([3, num_outputs]), + ) + + def test_gp_cuda(self): + if torch.cuda.is_available(): + self.test_gp(cuda=True) + + def test_condition_on_observations(self, cuda=False): + for (train_iteration_fidelity, train_data_fidelity) in [ + (False, True), + (True, False), + (True, True), + ]: + for batch_shape in (torch.Size(), torch.Size([2])): + for num_outputs in (1, 2): + for double in (False, True): + num_dim = 1 + train_iteration_fidelity + train_data_fidelity + tkwargs = { + "device": torch.device("cuda") + if cuda + else torch.device("cpu"), + "dtype": torch.double if double else torch.float, + } + model, model_kwargs = self._get_model_and_data( + batch_shape=batch_shape, + num_outputs=num_outputs, + train_iteration_fidelity=train_iteration_fidelity, + train_data_fidelity=train_data_fidelity, + **tkwargs, + ) + # evaluate model + model.posterior(torch.rand(torch.Size([4, num_dim]), **tkwargs)) + # test condition_on_observations + fant_shape = torch.Size([2]) + # fantasize at different input points + X_fant, Y_fant = _get_random_data_with_fidelity( + fant_shape + batch_shape, + num_outputs, + n=3, + train_iteration_fidelity=train_iteration_fidelity, + train_data_fidelity=train_data_fidelity, + **tkwargs, + ) + c_kwargs = ( + {"noise": torch.full_like(Y_fant, 0.01)} + if isinstance(model, FixedNoiseGP) + else {} + ) + cm = model.condition_on_observations(X_fant, Y_fant, **c_kwargs) + # fantasize at different same input points + c_kwargs_same_inputs = ( + {"noise": torch.full_like(Y_fant[0], 0.01)} + if isinstance(model, FixedNoiseGP) + else {} + ) + cm_same_inputs = model.condition_on_observations( + X_fant[0], Y_fant, **c_kwargs_same_inputs + ) + + test_Xs = [ + # test broadcasting single input across fantasy and + # model batches + torch.rand(4, num_dim, **tkwargs), + # separate input for each model batch and broadcast across + # fantasy batches + torch.rand( + batch_shape + torch.Size([4, num_dim]), **tkwargs + ), + # separate input for each model and fantasy batch + torch.rand( + fant_shape + batch_shape + torch.Size([4, num_dim]), + **tkwargs, + ), + ] + for test_X in test_Xs: + posterior = cm.posterior(test_X) + self.assertEqual( + posterior.mean.shape, + fant_shape + batch_shape + torch.Size([4, num_outputs]), + ) + posterior_same_inputs = cm_same_inputs.posterior(test_X) + self.assertEqual( + posterior_same_inputs.mean.shape, + fant_shape + batch_shape + torch.Size([4, num_outputs]), + ) + + # check that fantasies of batched model are correct + if len(batch_shape) > 0 and test_X.dim() == 2: + state_dict_non_batch = { + key: (val[0] if val.numel() > 1 else val) + for key, val in model.state_dict().items() + } + model_kwargs_non_batch = { + "train_X": model_kwargs["train_X"][0], + "train_Y": model_kwargs["train_Y"][0], + "train_iteration_fidelity": model_kwargs[ + "train_iteration_fidelity" + ], + "train_data_fidelity": model_kwargs[ + "train_data_fidelity" + ], + } + if "train_Yvar" in model_kwargs: + model_kwargs_non_batch["train_Yvar"] = model_kwargs[ + "train_Yvar" + ][0] + model_non_batch = type(model)(**model_kwargs_non_batch) + model_non_batch.load_state_dict(state_dict_non_batch) + model_non_batch.eval() + model_non_batch.likelihood.eval() + model_non_batch.posterior( + torch.rand(torch.Size([4, num_dim]), **tkwargs) + ) + c_kwargs = ( + {"noise": torch.full_like(Y_fant[0, 0, :], 0.01)} + if isinstance(model, FixedNoiseGP) + else {} + ) + mnb = model_non_batch + cm_non_batch = mnb.condition_on_observations( + X_fant[0][0], Y_fant[:, 0, :], **c_kwargs + ) + non_batch_posterior = cm_non_batch.posterior(test_X) + self.assertTrue( + torch.allclose( + posterior_same_inputs.mean[:, 0, ...], + non_batch_posterior.mean, + atol=1e-3, + ) + ) + self.assertTrue( + torch.allclose( + posterior_same_inputs.mvn.covariance_matrix[ + :, 0, :, : + ], + non_batch_posterior.mvn.covariance_matrix, + atol=1e-3, + ) + ) + + def test_condition_on_observations_cuda(self): + if torch.cuda.is_available(): + self.test_condition_on_observations(cuda=True) + + def test_fantasize(self, cuda=False): + for (train_iteration_fidelity, train_data_fidelity) in [ + (False, True), + (True, False), + (True, True), + ]: + num_dim = 1 + train_iteration_fidelity + train_data_fidelity + for batch_shape in (torch.Size(), torch.Size([2])): + for num_outputs in (1, 2): + for double in (False, True): + tkwargs = { + "device": torch.device("cuda") + if cuda + else torch.device("cpu"), + "dtype": torch.double if double else torch.float, + } + model, model_kwargs = self._get_model_and_data( + batch_shape=batch_shape, + num_outputs=num_outputs, + train_iteration_fidelity=train_iteration_fidelity, + train_data_fidelity=train_data_fidelity, + **tkwargs, + ) + # fantasize + X_f = torch.rand( + torch.Size(batch_shape + torch.Size([4, num_dim])), + **tkwargs, + ) + sampler = SobolQMCNormalSampler(num_samples=3) + fm = model.fantasize(X=X_f, sampler=sampler) + self.assertIsInstance(fm, model.__class__) + fm = model.fantasize( + X=X_f, sampler=sampler, observation_noise=False + ) + self.assertIsInstance(fm, model.__class__) + + def test_fantasize_cuda(self): + if torch.cuda.is_available(): + self.test_fantasize(cuda=True)