# Manifold GP Supervised Learning via Precision Matrix on 1D Manifold

## Preamble

This notebook provides an example of how to perform Gaussian Process Regression on a 1D manifold. In this example we consider a supervised learning scenario, namely the number of labeled data points is equivalent to the number of the sampled points from the underlying manifold.

In [1]:
import torch
import gpytorch
import numpy as np
import os
import scipy.spatial as ss

from manifold_gp.kernels.riemann_matern_kernel import RiemannMaternKernel
from manifold_gp.models.riemann_gp import RiemannGP
from gpytorch.priors import NormalPrior, GammaPrior

## Dataset Preprocessing

### Load & Settings

In [2]:
dataset = 'protein' # ['protein','elevators']
cut = 10000

if dataset == 'protein':
    data = np.loadtxt('../benchmark/protein.csv', delimiter=",")[:cut]
    sampled_x, sampled_y = data[:, 1:], data[:, 0]
elif dataset == 'elevators':
    data = np.array(loadmat('../benchmark/elevators.mat')['data'])
    sampled_x, sampled_y = data[:, :-1], data[:, -1]
    
    
if not os.path.isfile(f'../outputs/{dataset}.mat'):
    print(f'Downloading \'{dataset}\' UCI dataset...')
    urllib.request.urlretrieve(datasets[dataset], f'../outputs/{dataset}.mat')

# remove coincident points
sampled_x, id_unique = np.unique(sampled_x, axis=0, return_index=True)
sampled_y = sampled_y[id_unique]

# cut between 0.01 and 0.99 quantile of distances
kd_tree = ss.KDTree(sampled_x)
v = kd_tree.query(sampled_x, k=2)[0][:, 1]
idx = np.argsort(v)
percentile_start = int(np.round(idx.shape[0]*0.10))
percentile_end = int(np.round(idx.shape[0]*0.90))
sampled_x = sampled_x[idx[percentile_start:percentile_end], :]
sampled_y = sampled_y[idx[percentile_start:percentile_end]]
m = sampled_x.shape[0]

normalize_features = True
normalize_labels = True

### Trainset & Testset

In [3]:
split = int(0.8 * m)

train_x, train_y = sampled_x[:split], sampled_y[:split]
test_x, test_y = sampled_x[split:], sampled_y[split:]

train_x = torch.from_numpy(train_x).float()
train_y = torch.from_numpy(train_y).float()
test_x = torch.from_numpy(test_x).float()
test_y = torch.from_numpy(test_y).float()

if normalize_features:
    mu_x, std_x = train_x.mean(dim=-2, keepdim=True), train_x.std(dim=-2, keepdim=True) + 1e-6
    train_x.sub_(mu_x).div_(std_x)
    test_x.sub_(mu_x).div_(std_x)
    
if normalize_labels:
    mu_y, std_y = train_y.mean(), train_y.std()
    train_y.sub_(mu_y).div_(std_y)
    test_y.sub_(mu_y).div_(std_y)

### Hyperparameters Priors

In [4]:
import scipy.spatial as ss
neighbors = 10
kd_tree = ss.KDTree(train_x)
v = np.sort(kd_tree.query(train_x, k=neighbors+1)[0][:, 1:].ravel())
percentile_99 = int(np.round(v.shape[0]*0.99))
gamma_rate = 100.0/np.std(v)
gamma_concentration = gamma_rate * v[percentile_99] + 1

### Move Data to Device

In [5]:
train_x, train_y = train_x.contiguous(), train_y.contiguous()
test_x, test_y = test_x.contiguous(), test_y.contiguous()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
train_x, train_y = train_x.to(device), train_y.to(device)
test_x, test_y = test_x.to(device), test_y.to(device)

## Model

In [6]:
%%capture
likelihood = gpytorch.likelihoods.GaussianLikelihood(
    noise_constraint=gpytorch.constraints.GreaterThan(1e-8),
    noise_prior=None  # NormalPrior(torch.tensor([0.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

kernel = gpytorch.kernels.ScaleKernel(
    RiemannMaternKernel(
        nu=1,
        nodes=train_x,
        neighbors=10,
        operator="symmetric",
        modes=10,
        ball_scale=1.0,
        support_kernel=gpytorch.kernels.RBFKernel(),
        epsilon_prior=GammaPrior(gamma_concentration, gamma_rate),
        lengthscale_prior=None  # InverseGammaPrior(igamma_concentration, igamma_rate)
    ),
    outputscale_prior=None  # NormalPrior(torch.tensor([1.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

model = RiemannGP(train_x, train_y, likelihood, kernel).to(device)

## Train

In [7]:
%%capture
hypers = {
    'likelihood.noise_covar.noise': 1e-2,
    'covar_module.base_kernel.epsilon': 0.5,
    'covar_module.base_kernel.lengthscale': 0.5,
    'covar_module.outputscale': 1.0,
    'covar_module.base_kernel.support_kernel.lengthscale': 1.0,
}
model.initialize(**hypers)

In [8]:
model.manifold_informed_train(lr=1e-1, iter=100, verbose=True)

Iter: 0, LR: 0.100, Loss: 2945.053, NoiseVar: 0.010, SignalVar: 11.858, Lengthscale: 0.500, Epsilon: 0.500
Iter: 1, LR: 0.100, Loss: 2933.545, NoiseVar: 0.009, SignalVar: 11.758, Lengthscale: 0.462, Epsilon: 0.462
Iter: 2, LR: 0.100, Loss: 2861.540, NoiseVar: 0.008, SignalVar: 11.694, Lengthscale: 0.489, Epsilon: 0.427
Iter: 3, LR: 0.100, Loss: 2782.364, NoiseVar: 0.008, SignalVar: 11.631, Lengthscale: 0.522, Epsilon: 0.394
Iter: 4, LR: 0.100, Loss: 2693.258, NoiseVar: 0.007, SignalVar: 11.560, Lengthscale: 0.558, Epsilon: 0.363
Iter: 5, LR: 0.100, Loss: 2591.013, NoiseVar: 0.006, SignalVar: 11.481, Lengthscale: 0.598, Epsilon: 0.334
Iter: 6, LR: 0.100, Loss: 2488.245, NoiseVar: 0.006, SignalVar: 11.397, Lengthscale: 0.640, Epsilon: 0.306
Iter: 7, LR: 0.100, Loss: 2403.438, NoiseVar: 0.005, SignalVar: 11.310, Lengthscale: 0.685, Epsilon: 0.280
Iter: 8, LR: 0.100, Loss: 2327.448, NoiseVar: 0.005, SignalVar: 11.229, Lengthscale: 0.733, Epsilon: 0.257
Iter: 9, LR: 0.100, Loss: 2306.316, N

Iter: 89, LR: 0.100, Loss: 1970.244, NoiseVar: 0.000, SignalVar: 6.286, Lengthscale: 2.491, Epsilon: 0.307
Iter: 90, LR: 0.100, Loss: 1981.894, NoiseVar: 0.000, SignalVar: 6.196, Lengthscale: 2.504, Epsilon: 0.309
Iter: 91, LR: 0.100, Loss: 1972.198, NoiseVar: 0.000, SignalVar: 6.106, Lengthscale: 2.517, Epsilon: 0.310
Iter: 92, LR: 0.100, Loss: 1978.374, NoiseVar: 0.000, SignalVar: 6.017, Lengthscale: 2.530, Epsilon: 0.311
Iter: 93, LR: 0.100, Loss: 1973.474, NoiseVar: 0.000, SignalVar: 5.931, Lengthscale: 2.543, Epsilon: 0.314
Iter: 94, LR: 0.100, Loss: 1957.636, NoiseVar: 0.000, SignalVar: 5.848, Lengthscale: 2.557, Epsilon: 0.317
Iter: 95, LR: 0.100, Loss: 1944.637, NoiseVar: 0.000, SignalVar: 5.767, Lengthscale: 2.571, Epsilon: 0.321
Iter: 96, LR: 0.100, Loss: 1975.840, NoiseVar: 0.000, SignalVar: 5.686, Lengthscale: 2.585, Epsilon: 0.324
Iter: 97, LR: 0.100, Loss: 1954.791, NoiseVar: 0.000, SignalVar: 5.602, Lengthscale: 2.599, Epsilon: 0.327
Iter: 98, LR: 0.100, Loss: 1935.855, 

In [9]:
model.manifold_informed_train(lr=1e-1, iter=100, verbose=True)

Iter: 0, LR: 0.100, Loss: 2584.067, NoiseVar: 0.000, SignalVar: 2.894, Lengthscale: 2.643, Epsilon: 0.333
Iter: 1, LR: 0.100, Loss: 2209.219, NoiseVar: 0.000, SignalVar: 2.989, Lengthscale: 2.736, Epsilon: 0.363
Iter: 2, LR: 0.100, Loss: 1984.861, NoiseVar: 0.000, SignalVar: 3.080, Lengthscale: 2.828, Epsilon: 0.393
Iter: 3, LR: 0.100, Loss: 1906.180, NoiseVar: 0.000, SignalVar: 3.163, Lengthscale: 2.918, Epsilon: 0.422
Iter: 4, LR: 0.100, Loss: 1896.228, NoiseVar: 0.001, SignalVar: 3.234, Lengthscale: 3.003, Epsilon: 0.449
Iter: 5, LR: 0.100, Loss: 1945.835, NoiseVar: 0.001, SignalVar: 3.289, Lengthscale: 3.083, Epsilon: 0.471
Iter: 6, LR: 0.100, Loss: 1991.363, NoiseVar: 0.001, SignalVar: 3.328, Lengthscale: 3.155, Epsilon: 0.488
Iter: 7, LR: 0.100, Loss: 2018.860, NoiseVar: 0.001, SignalVar: 3.350, Lengthscale: 3.220, Epsilon: 0.499
Iter: 8, LR: 0.100, Loss: 2028.665, NoiseVar: 0.001, SignalVar: 3.357, Lengthscale: 3.277, Epsilon: 0.504
Iter: 9, LR: 0.100, Loss: 2019.189, NoiseVar: 

Iter: 85, LR: 0.100, Loss: 1794.599, NoiseVar: 0.000, SignalVar: 2.101, Lengthscale: 6.016, Epsilon: 0.530
Iter: 86, LR: 0.100, Loss: 1799.770, NoiseVar: 0.000, SignalVar: 2.082, Lengthscale: 6.044, Epsilon: 0.533
Iter: 87, LR: 0.100, Loss: 1795.171, NoiseVar: 0.000, SignalVar: 2.062, Lengthscale: 6.072, Epsilon: 0.536
Iter: 88, LR: 0.100, Loss: 1802.415, NoiseVar: 0.000, SignalVar: 2.041, Lengthscale: 6.100, Epsilon: 0.539
Iter: 89, LR: 0.100, Loss: 1793.591, NoiseVar: 0.000, SignalVar: 2.020, Lengthscale: 6.127, Epsilon: 0.541
Iter: 90, LR: 0.100, Loss: 1794.927, NoiseVar: 0.000, SignalVar: 1.998, Lengthscale: 6.155, Epsilon: 0.544
Iter: 91, LR: 0.100, Loss: 1783.475, NoiseVar: 0.000, SignalVar: 1.976, Lengthscale: 6.183, Epsilon: 0.546
Iter: 92, LR: 0.100, Loss: 1796.052, NoiseVar: 0.000, SignalVar: 1.953, Lengthscale: 6.211, Epsilon: 0.548
Iter: 93, LR: 0.100, Loss: 1791.066, NoiseVar: 0.000, SignalVar: 1.931, Lengthscale: 6.239, Epsilon: 0.551
Iter: 94, LR: 0.100, Loss: 1793.546, 

## Evaluation

In [10]:
%%capture
likelihood.eval()
model.eval()

## Metrics

In [11]:
with torch.no_grad(), gpytorch.settings.fast_pred_var(), gpytorch.settings.cg_tolerance(10000):
    preds_test = likelihood(model(test_x))
        
    error = test_y - preds_test.mean
    covar = preds_test.lazy_covariance_matrix.evaluate_kernel()
    inv_quad, logdet = covar.inv_quad_logdet(inv_quad_rhs=error.unsqueeze(-1), logdet=True)
    
    rmse = (error.square().sum()/test_y.shape[0]).sqrt()
    nll = 0.5 * sum([inv_quad, logdet, error.size(-1)* np.log(2 * np.pi)])/test_y.shape[0]
    
print("RMSE: ", rmse)
print("NLL: ", nll)

RMSE:  tensor(1.0177, device='cuda:0')
NLL:  tensor(5411.5659, device='cuda:0')
