# Understand 2D process with derivates

https://docs.gpytorch.ai/en/v1.12/examples/08_Advanced_Usage/Simple_GP_Regression_Derivative_Information_2d.html
https://docs.gpytorch.ai/en/v1.12/examples/03_Multitask_Exact_GPs/Multitask_GP_Regression.html

In [17]:
import torch
import gpytorch
import math
from matplotlib import cm
from matplotlib import pyplot as plt
import numpy as np

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
def franke(X, Y):
    term1 = .75*torch.exp(-((9*X - 2).pow(2) + (9*Y - 2).pow(2))/4)
    term2 = .75*torch.exp(-((9*X + 1).pow(2))/49 - (9*Y + 1)/10)
    term3 = .5*torch.exp(-((9*X - 7).pow(2) + (9*Y - 3).pow(2))/4)
    term4 = .2*torch.exp(-(9*X - 4).pow(2) - (9*Y - 7).pow(2))

    f = term1 + term2 + term3 - term4
    dfx = -2*(9*X - 2)*9/4 * term1 - 2*(9*X + 1)*9/49 * term2 + \
          -2*(9*X - 7)*9/4 * term3 + 2*(9*X - 4)*9 * term4
    dfy = -2*(9*Y - 2)*9/4 * term1 - 9/10 * term2 + \
          -2*(9*Y - 3)*9/4 * term3 + 2*(9*Y - 7)*9 * term4

    return f, dfx, dfy

In [19]:
xv, yv = torch.meshgrid(torch.linspace(0, 1, 10), torch.linspace(0, 1, 10), indexing="ij")
train_x = torch.cat((
    xv.contiguous().view(xv.numel(), 1),
    yv.contiguous().view(yv.numel(), 1)),
    dim=1
)

f, dfx, dfy = franke(train_x[:, 0], train_x[:, 1])
train_y = torch.stack([f, dfx, dfy], -1).squeeze(1)

train_y += 0.05 * torch.randn(train_y.size()) # Add noise to both values and gradients
print(train_y.shape)

torch.Size([100, 3])


In [20]:
class GPModelWithDerivatives(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPModelWithDerivatives, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMeanGrad()
        self.base_kernel = gpytorch.kernels.RBFKernelGrad(ard_num_dims = 2)
        self.covar_module = gpytorch.kernels.ScaleKernel(self.base_kernel)

    def forward(self, x):
        # mean_x shape: torch.Size([100, 3])
        mean_x = self.mean_module(x)
        # print("mean_x shape:", mean_x.shape)
        covar_x = self.covar_module(x)
        # covar_x shape: torch.Size([300, 300])
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

# Access via likelihood.raw_task_noises
likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks = 3)  # Value + x-derivative + y-derivative
model = GPModelWithDerivatives(train_x, train_y, likelihood)

In [13]:
for name, param in likelihood.named_parameters():
    print(name, param.shape)

likelihood.raw_task_noises

raw_task_noises torch.Size([3])
raw_noise torch.Size([1])


Parameter containing:
tensor([-2.5971, -2.6760, -2.6152], requires_grad=True)

In [21]:
# this is for running the notebook in our testing framework
import os
smoke_test = ('CI' in os.environ)
training_iter = 2 if smoke_test else 50


# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

for i in range(training_iter):
    optimizer.zero_grad()
    output = model(train_x)
    loss = - mll(output, train_y)
    loss.backward()
    print("Iter %d/%d - Loss: %.3f   lengthscales: %.3f, %.3f   noise: %.3f" % (
        i + 1, training_iter, loss.item(),
        model.covar_module.base_kernel.lengthscale.squeeze()[0],
        model.covar_module.base_kernel.lengthscale.squeeze()[1],
        model.likelihood.noise.item()
    ))
    optimizer.step()

Iter 1/50 - Loss: 1.278   lengthscales: 0.693, 0.693   noise: 0.693
Iter 2/50 - Loss: 1.263   lengthscales: 0.668, 0.668   noise: 0.669
Iter 3/50 - Loss: 1.247   lengthscales: 0.645, 0.644   noise: 0.644
Iter 4/50 - Loss: 1.232   lengthscales: 0.621, 0.621   noise: 0.621
Iter 5/50 - Loss: 1.215   lengthscales: 0.599, 0.598   noise: 0.598
Iter 6/50 - Loss: 1.199   lengthscales: 0.576, 0.576   noise: 0.576
Iter 7/50 - Loss: 1.182   lengthscales: 0.555, 0.555   noise: 0.554
Iter 8/50 - Loss: 1.164   lengthscales: 0.534, 0.534   noise: 0.533
Iter 9/50 - Loss: 1.146   lengthscales: 0.513, 0.514   noise: 0.513
Iter 10/50 - Loss: 1.128   lengthscales: 0.493, 0.494   noise: 0.493
Iter 11/50 - Loss: 1.109   lengthscales: 0.473, 0.474   noise: 0.474
Iter 12/50 - Loss: 1.090   lengthscales: 0.453, 0.455   noise: 0.455
Iter 13/50 - Loss: 1.070   lengthscales: 0.435, 0.437   noise: 0.436
Iter 14/50 - Loss: 1.050   lengthscales: 0.416, 0.419   noise: 0.419
Iter 15/50 - Loss: 1.030   lengthscales: 0.

In [23]:
output.covariance_matrix.shape

torch.Size([300, 300])