/
gp_regression.py
343 lines (305 loc) · 14.6 KB
/
gp_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#! /usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
r"""
Gaussian Process Regression models based on GPyTorch models.
"""
from typing import Any, List, Optional, Union
import torch
from gpytorch.constraints.constraints import GreaterThan
from gpytorch.distributions.multivariate_normal import MultivariateNormal
from gpytorch.kernels.matern_kernel import MaternKernel
from gpytorch.kernels.scale_kernel import ScaleKernel
from gpytorch.likelihoods.gaussian_likelihood import (
FixedNoiseGaussianLikelihood,
GaussianLikelihood,
_GaussianLikelihoodBase,
)
from gpytorch.likelihoods.likelihood import Likelihood
from gpytorch.likelihoods.noise_models import HeteroskedasticNoise
from gpytorch.means.constant_mean import ConstantMean
from gpytorch.mlls.noise_model_added_loss_term import NoiseModelAddedLossTerm
from gpytorch.models.exact_gp import ExactGP
from gpytorch.module import Module
from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior
from gpytorch.priors.torch_priors import GammaPrior
from torch import Tensor
from .. import settings
from ..sampling.samplers import MCSampler
from .gpytorch import BatchedMultiOutputGPyTorchModel
from .transforms.outcome import Log, OutcomeTransform
from .utils import validate_input_scaling
MIN_INFERRED_NOISE_LEVEL = 1e-4
class SingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP):
r"""A single-task exact GP model.
A single-task exact GP using relatively strong priors on the Kernel
hyperparameters, which work best when covariates are normalized to the unit
cube and outcomes are standardized (zero mean, unit variance).
This model works in batch mode (each batch having its own hyperparameters).
When the training observations include multiple outputs, this model will use
batching to model outputs independently.
Use this model when you have independent output(s) and all outputs use the
same training data. If outputs are independent and outputs have different
training data, use the ModelListGP. When modeling correlations between
outputs, use the MultiTaskGP.
"""
def __init__(
self,
train_X: Tensor,
train_Y: Tensor,
likelihood: Optional[Likelihood] = None,
covar_module: Optional[Module] = None,
outcome_transform: Optional[OutcomeTransform] = None,
) -> None:
r"""A single-task exact GP model.
Args:
train_X: A `batch_shape x n x d` tensor of training features.
train_Y: A `batch_shape x n x m` tensor of training observations.
likelihood: A likelihood. If omitted, use a standard
GaussianLikelihood with inferred noise level.
covar_module: The module computing the covariance (Kernel) matrix.
If omitted, use a `MaternKernel`.
outcome_transform: An outcome transform that is applied to the
training data during instantiation and to the posterior during
inference (that is, the `Posterior` obtained by calling
`.posterior` on the model will be on the original scale).
Example:
>>> train_X = torch.rand(20, 2)
>>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True)
>>> model = SingleTaskGP(train_X, train_Y)
"""
if outcome_transform is not None:
train_Y, _ = outcome_transform(train_Y)
validate_input_scaling(train_X=train_X, train_Y=train_Y)
self._validate_tensor_args(X=train_X, Y=train_Y)
self._set_dimensions(train_X=train_X, train_Y=train_Y)
train_X, train_Y, _ = self._transform_tensor_args(X=train_X, Y=train_Y)
if likelihood is None:
noise_prior = GammaPrior(1.1, 0.05)
noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate
likelihood = GaussianLikelihood(
noise_prior=noise_prior,
batch_shape=self._aug_batch_shape,
noise_constraint=GreaterThan(
MIN_INFERRED_NOISE_LEVEL,
transform=None,
initial_value=noise_prior_mode,
),
)
else:
self._is_custom_likelihood = True
ExactGP.__init__(self, train_X, train_Y, likelihood)
self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape)
if covar_module is None:
self.covar_module = ScaleKernel(
MaternKernel(
nu=2.5,
ard_num_dims=train_X.shape[-1],
batch_shape=self._aug_batch_shape,
lengthscale_prior=GammaPrior(3.0, 6.0),
),
batch_shape=self._aug_batch_shape,
outputscale_prior=GammaPrior(2.0, 0.15),
)
self._subset_batch_dict = {
"likelihood.noise_covar.raw_noise": -2,
"mean_module.constant": -2,
"covar_module.raw_outputscale": -1,
"covar_module.base_kernel.raw_lengthscale": -3,
}
else:
self.covar_module = covar_module
# TODO: Allow subsetting of other covar modules
if outcome_transform is not None:
self.outcome_transform = outcome_transform
self.to(train_X)
def forward(self, x: Tensor) -> MultivariateNormal:
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return MultivariateNormal(mean_x, covar_x)
class FixedNoiseGP(BatchedMultiOutputGPyTorchModel, ExactGP):
r"""A single-task exact GP model using fixed noise levels.
A single-task exact GP that uses fixed observation noise levels. This model
also uses relatively strong priors on the Kernel hyperparameters, which work
best when covariates are normalized to the unit cube and outcomes are
standardized (zero mean, unit variance).
This model works in batch mode (each batch having its own hyperparameters).
"""
def __init__(
self,
train_X: Tensor,
train_Y: Tensor,
train_Yvar: Tensor,
outcome_transform: Optional[OutcomeTransform] = None,
) -> None:
r"""A single-task exact GP model using fixed noise levels.
Args:
train_X: A `batch_shape x n x d` tensor of training features.
train_Y: A `batch_shape x n x m` tensor of training observations.
train_Yvar: A `batch_shape x n x m` tensor of observed measurement
noise.
outcome_transform: An outcome transform that is applied to the
training data during instantiation and to the posterior during
inference (that is, the `Posterior` obtained by calling
`.posterior` on the model will be on the original scale).
Example:
>>> train_X = torch.rand(20, 2)
>>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True)
>>> train_Yvar = torch.full_like(train_Y, 0.2)
>>> model = FixedNoiseGP(train_X, train_Y, train_Yvar)
"""
if outcome_transform is not None:
train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar)
validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
self._set_dimensions(train_X=train_X, train_Y=train_Y)
train_X, train_Y, train_Yvar = self._transform_tensor_args(
X=train_X, Y=train_Y, Yvar=train_Yvar
)
likelihood = FixedNoiseGaussianLikelihood(
noise=train_Yvar, batch_shape=self._aug_batch_shape
)
ExactGP.__init__(
self, train_inputs=train_X, train_targets=train_Y, likelihood=likelihood
)
self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape)
self.covar_module = ScaleKernel(
base_kernel=MaternKernel(
nu=2.5,
ard_num_dims=train_X.shape[-1],
batch_shape=self._aug_batch_shape,
lengthscale_prior=GammaPrior(3.0, 6.0),
),
batch_shape=self._aug_batch_shape,
outputscale_prior=GammaPrior(2.0, 0.15),
)
if outcome_transform is not None:
self.outcome_transform = outcome_transform
self._subset_batch_dict = {
"mean_module.constant": -2,
"covar_module.raw_outputscale": -1,
"covar_module.base_kernel.raw_lengthscale": -3,
}
self.to(train_X)
def fantasize(
self,
X: Tensor,
sampler: MCSampler,
observation_noise: Union[bool, Tensor] = True,
**kwargs: Any,
) -> "FixedNoiseGP":
r"""Construct a fantasy model.
Constructs a fantasy model in the following fashion:
(1) compute the model posterior at `X` (if `observation_noise=True`,
this includes observation noise taken as the mean across the observation
noise in the training data. If `observation_noise` is a Tensor, use
it directly as the observation noise to add).
(2) sample from this posterior (using `sampler`) to generate "fake"
observations.
(3) condition the model on the new fake observations.
Args:
X: A `batch_shape x n' x d`-dim Tensor, where `d` is the dimension of
the feature space, `n'` is the number of points per batch, and
`batch_shape` is the batch shape (must be compatible with the
batch shape of the model).
sampler: The sampler used for sampling from the posterior at `X`.
observation_noise: If True, include the mean across the observation
noise in the training data as observation noise in the posterior
from which the samples are drawn. If a Tensor, use it directly
as the specified measurement noise.
Returns:
The constructed fantasy model.
"""
propagate_grads = kwargs.pop("propagate_grads", False)
with settings.propagate_grads(propagate_grads):
post_X = self.posterior(X, observation_noise=observation_noise, **kwargs)
Y_fantasized = sampler(post_X) # num_fantasies x batch_shape x n' x m
# Use the mean of the previous noise values (TODO: be smarter here).
# noise should be batch_shape x q x m when X is batch_shape x q x d, and
# Y_fantasized is num_fantasies x batch_shape x q x m.
noise_shape = Y_fantasized.shape[1:]
noise = self.likelihood.noise.mean().expand(noise_shape)
return self.condition_on_observations(X=X, Y=Y_fantasized, noise=noise)
def forward(self, x: Tensor) -> MultivariateNormal:
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return MultivariateNormal(mean_x, covar_x)
def subset_output(self, idcs: List[int]) -> "BatchedMultiOutputGPyTorchModel":
r"""Subset the model along the output dimension.
Args:
idcs: The output indices to subset the model to.
Returns:
The current model, subset to the specified output indices.
"""
new_model = super().subset_output(idcs=idcs)
full_noise = new_model.likelihood.noise_covar.noise
new_noise = full_noise[..., idcs if len(idcs) > 1 else idcs[0], :]
new_model.likelihood.noise_covar.noise = new_noise
return new_model
class HeteroskedasticSingleTaskGP(SingleTaskGP):
r"""A single-task exact GP model using a heteroskeastic noise model.
This model internally wraps another GP (a SingleTaskGP) to model the
observation noise. This allows the likelihood to make out-of-sample
predictions for the observation noise levels.
"""
def __init__(
self,
train_X: Tensor,
train_Y: Tensor,
train_Yvar: Tensor,
outcome_transform: Optional[OutcomeTransform] = None,
) -> None:
r"""A single-task exact GP model using a heteroskedastic noise model.
Args:
train_X: A `batch_shape x n x d` tensor of training features.
train_Y: A `batch_shape x n x m` tensor of training observations.
train_Yvar: A `batch_shape x n x m` tensor of observed measurement
noise.
outcome_transform: An outcome transform that is applied to the
training data during instantiation and to the posterior during
inference (that is, the `Posterior` obtained by calling
`.posterior` on the model will be on the original scale).
Note that the noise model internally log-transforms the
variances, which will happen after this transform is applied.
Example:
>>> train_X = torch.rand(20, 2)
>>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True)
>>> se = torch.norm(train_X, dim=1, keepdim=True)
>>> train_Yvar = 0.1 + se * torch.rand_like(train_Y)
>>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar)
"""
if outcome_transform is not None:
train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar)
validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
self._set_dimensions(train_X=train_X, train_Y=train_Y)
noise_likelihood = GaussianLikelihood(
noise_prior=SmoothedBoxPrior(-3, 5, 0.5, transform=torch.log),
batch_shape=self._aug_batch_shape,
noise_constraint=GreaterThan(
MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0
),
)
noise_model = SingleTaskGP(
train_X=train_X,
train_Y=train_Yvar,
likelihood=noise_likelihood,
outcome_transform=Log(),
)
likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model))
super().__init__(train_X=train_X, train_Y=train_Y, likelihood=likelihood)
self.register_added_loss_term("noise_added_loss")
self.update_added_loss_term(
"noise_added_loss", NoiseModelAddedLossTerm(noise_model)
)
if outcome_transform is not None:
self.outcome_transform = outcome_transform
self.to(train_X)
def condition_on_observations(
self, X: Tensor, Y: Tensor, **kwargs: Any
) -> "HeteroskedasticSingleTaskGP":
raise NotImplementedError
def subset_output(self, idcs: List[int]) -> "HeteroskedasticSingleTaskGP":
raise NotImplementedError