# Vanilla GP Supervised Learning via Precision Matrix on 1D Manifold

## Preamble

This notebook provides an example of how to perform Gaussian Process Regression on a 1D manifold. In this example we consider a supervised learning scenario, namely the number of labeled data points is equivalent to the number of the sampled points from the underlying manifold.

In [1]:
import torch
import gpytorch
import numpy as np
from scipy.io import loadmat
import scipy.spatial as ss
import urllib.request
import os

from manifold_gp.models.vanilla_gp import VanillaGP

## Dataset Preprocessing

### Load & Settings

In [2]:
dataset = 'protein' # ['protein','elevators']
cut = 10000

if dataset == 'protein':
    data = np.loadtxt('../benchmark/protein.csv', delimiter=",")[:cut]
    sampled_x, sampled_y = data[:, 1:], data[:, 0]
elif dataset == 'elevators':
    data = np.array(loadmat('../benchmark/elevators.mat')['data'])
    sampled_x, sampled_y = data[:, :-1], data[:, -1]
    
    
if not os.path.isfile(f'../outputs/{dataset}.mat'):
    print(f'Downloading \'{dataset}\' UCI dataset...')
    urllib.request.urlretrieve(datasets[dataset], f'../outputs/{dataset}.mat')

# remove coincident points
sampled_x, id_unique = np.unique(sampled_x, axis=0, return_index=True)
sampled_y = sampled_y[id_unique]

# cut between 0.01 and 0.99 quantile of distances
kd_tree = ss.KDTree(sampled_x)
v = kd_tree.query(sampled_x, k=2)[0][:, 1]
idx = np.argsort(v)
percentile_start = int(np.round(idx.shape[0]*0.10))
percentile_end = int(np.round(idx.shape[0]*0.90))
sampled_x = sampled_x[idx[percentile_start:percentile_end], :]
sampled_y = sampled_y[idx[percentile_start:percentile_end]]
m = sampled_x.shape[0]

normalize_features = True
normalize_labels = True

### Trainset & Testset

In [3]:
split = int(0.8 * m)

train_x, train_y = sampled_x[:split], sampled_y[:split]
test_x, test_y = sampled_x[split:], sampled_y[split:]

train_x = torch.from_numpy(train_x).float()
train_y = torch.from_numpy(train_y).float()
test_x = torch.from_numpy(test_x).float()
test_y = torch.from_numpy(test_y).float()

if normalize_features:
    mu_x, std_x = train_x.mean(dim=-2, keepdim=True), train_x.std(dim=-2, keepdim=True) + 1e-6
    train_x.sub_(mu_x).div_(std_x)
    test_x.sub_(mu_x).div_(std_x)
    
if normalize_labels:
    mu_y, std_y = train_y.mean(), train_y.std()
    train_y.sub_(mu_y).div_(std_y)
    test_y.sub_(mu_y).div_(std_y)

### Move Data to Device

In [4]:
train_x, train_y = train_x.contiguous(), train_y.contiguous()
test_x, test_y = test_x.contiguous(), test_y.contiguous()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
train_x, train_y = train_x.to(device), train_y.to(device)
test_x, test_y = test_x.to(device), test_y.to(device)

## Model

In [5]:
%%capture
likelihood = gpytorch.likelihoods.GaussianLikelihood(
    noise_constraint=gpytorch.constraints.GreaterThan(1e-8),
    noise_prior=None  # NormalPrior(torch.tensor([0.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

kernel = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=2.5))
# gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

model = VanillaGP(train_x, train_y, likelihood, kernel).to(device)

hypers = {
    'likelihood.noise_covar.noise': 1e-2,
    'covar_module.base_kernel.lengthscale': 0.5,
    'covar_module.outputscale': 1.0,
}
model.initialize(**hypers)

## Train

In [6]:
model.vanilla_train(lr=1e-1, iter=100, verbose=True)

Iteration: 0, Loss: 1.167, Noise Variance: 0.100, Signal Variance: 1.000, Lengthscale: 0.500
Iteration: 1, Loss: 1.079, Noise Variance: 0.105, Signal Variance: 1.032, Lengthscale: 0.462
Iteration: 2, Loss: 1.072, Noise Variance: 0.109, Signal Variance: 1.060, Lengthscale: 0.431
Iteration: 3, Loss: 1.076, Noise Variance: 0.111, Signal Variance: 1.084, Lengthscale: 0.405
Iteration: 4, Loss: 1.084, Noise Variance: 0.112, Signal Variance: 1.101, Lengthscale: 0.385
Iteration: 5, Loss: 1.088, Noise Variance: 0.112, Signal Variance: 1.112, Lengthscale: 0.372
Iteration: 6, Loss: 1.092, Noise Variance: 0.110, Signal Variance: 1.117, Lengthscale: 0.363
Iteration: 7, Loss: 1.097, Noise Variance: 0.108, Signal Variance: 1.116, Lengthscale: 0.360
Iteration: 8, Loss: 1.092, Noise Variance: 0.106, Signal Variance: 1.111, Lengthscale: 0.360
Iteration: 9, Loss: 1.090, Noise Variance: 0.102, Signal Variance: 1.102, Lengthscale: 0.363
Iteration: 10, Loss: 1.093, Noise Variance: 0.099, Signal Variance: 1.

Iteration: 90, Loss: 1.078, Noise Variance: 0.039, Signal Variance: 1.021, Lengthscale: 0.383
Iteration: 91, Loss: 1.074, Noise Variance: 0.039, Signal Variance: 1.021, Lengthscale: 0.383
Iteration: 92, Loss: 1.075, Noise Variance: 0.039, Signal Variance: 1.022, Lengthscale: 0.381
Iteration: 93, Loss: 1.079, Noise Variance: 0.039, Signal Variance: 1.023, Lengthscale: 0.380
Iteration: 94, Loss: 1.081, Noise Variance: 0.039, Signal Variance: 1.024, Lengthscale: 0.379
Iteration: 95, Loss: 1.078, Noise Variance: 0.039, Signal Variance: 1.024, Lengthscale: 0.378
Iteration: 96, Loss: 1.073, Noise Variance: 0.039, Signal Variance: 1.024, Lengthscale: 0.377
Iteration: 97, Loss: 1.075, Noise Variance: 0.039, Signal Variance: 1.023, Lengthscale: 0.378
Iteration: 98, Loss: 1.084, Noise Variance: 0.038, Signal Variance: 1.021, Lengthscale: 0.378
Iteration: 99, Loss: 1.085, Noise Variance: 0.038, Signal Variance: 1.020, Lengthscale: 0.378


## Evaluation

In [7]:
%%capture
likelihood.eval()
model.eval()

## Metrics

In [9]:
with torch.no_grad(), gpytorch.settings.fast_pred_var(), gpytorch.settings.cg_tolerance(10000):
    preds_test = likelihood(model(test_x))
        
    error = test_y - preds_test.mean
    covar = preds_test.lazy_covariance_matrix.evaluate_kernel()
    inv_quad, logdet = covar.inv_quad_logdet(inv_quad_rhs=error.unsqueeze(-1), logdet=True)
    
    rmse = (error.square().sum()/test_y.shape[0]).sqrt()
    nll = 0.5 * sum([inv_quad, logdet, error.size(-1)* np.log(2 * np.pi)])/test_y.shape[0]
    
print("RMSE: ", rmse)
print("NLL: ", nll)

RMSE:  tensor(0.8567, device='cuda:0')
NLL:  tensor(1.1789, device='cuda:0')
