In [1]:
import torch
import gpytorch

In [2]:
torch.set_default_dtype(torch.float64)
torch.manual_seed(0)

<torch._C.Generator at 0x7f66f82a78b0>

In [3]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu"
device = torch.device("cuda:1")
n = 10000
d = 50

# Training data is n points in [0,1] inclusive regularly spaced
train_x = torch.linspace(0, 1, n).unsqueeze(1).expand(-1, d)
# True function is sin(2*pi*x) with Gaussian noise
freqs = 2 * torch.pi * torch.randn(d)
train_y = torch.sin(train_x @ freqs) + \
    torch.randn(train_x.shape[0]) * (0.04 ** 0.5)

In [4]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, base_kernel):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(base_kernel)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [5]:
train_x = train_x.to(device)
train_y = train_y.to(device)

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
# base_kernel = gpytorch.kernels.RBFKernel(ard_num_dims=d)
base_kernel = gpytorch.kernels.MaternKernel(nu=2.5, ard_num_dims=d)
model = ExactGPModel(train_x, train_y, likelihood, base_kernel)
model = model.to(device)
likelihood = likelihood.to(device)

In [None]:
# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the Adam optimizer
# Includes GaussianLikelihood parameters
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

training_iter = 100
for i in range(training_iter):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = model(train_x)
    # Calc loss and backprop gradients
    loss = -mll(output, train_y)
    loss.backward()
    print(f'Iter {i+1}/{training_iter} - Loss: {loss.item():.3f}   signal: {model.covar_module.outputscale.item():.3f}   '
        f'lengthscale: {model.covar_module.base_kernel.lengthscale.detach().cpu().numpy()}   '
        f'noise: {model.likelihood.noise.item():.3f}')
    optimizer.step()

Iter 1/100 - Loss: 0.771   signal: 0.693   lengthscale: [[0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718 0.69314718 0.69314718 0.69314718 0.69314718
  0.69314718 0.69314718]]   noise: 0.693
Iter 2/100 - Loss: 0.737   signal: 0.644   lengthscale: [[0.74438828 0.74438828 0.74438828 0.74438828 0.74438828 0.74438828
  0.74438828 0.74438828 0.74438828 0.74438828 0.74438828 0.74438828
  0.74438828 0.74438828 0.74438828 0.74438828 0.74438828 0.74438828
  0.74438828 0.74438828 0.74438828 0.74438828 0.74438828 0.74438828
  0.74438828 0.74438828 0.74438