# Manifold GP Semi-Supervised Learning via Precision Matrix on 1D Manifold

## Preamble

This notebook provides an example of how to perform Gaussian Process Regression on a 1D manifold. In this example we consider a supervised learning scenario, namely the number of labeled data points is equivalent to the number of the sampled points from the underlying manifold.

In [1]:
import torch
import gpytorch
import numpy as np
import os
import scipy.spatial as ss

from manifold_gp.kernels.riemann_matern_kernel import RiemannMaternKernel
from manifold_gp.models.riemann_gp import RiemannGP
from gpytorch.priors import NormalPrior, GammaPrior

## Dataset Preprocessing

### Load & Settings

In [2]:
dataset = 'mnist' # ['protein','elevators', 'ctslice', 'mnist']
cut = 10000

if dataset == 'protein':
    data = np.loadtxt('datasets/protein.csv', delimiter=",")[:cut]
    sampled_x, sampled_y = data[:, 1:], data[:, 0]
elif dataset == 'elevators':
    data = np.array(loadmat('datasets/elevators.mat')['data'])
    sampled_x, sampled_y = data[:, :-1], data[:, -1]
elif dataset == 'ctslice':
    data = np.loadtxt('datasets/ctslice.csv', delimiter=",")[:cut]
    sampled_x, sampled_y = data[:, :-1], data[:, -1]
elif dataset == 'mnist':
    data = np.loadtxt('datasets/mnist.csv')
    sampled_x, sampled_y = data[:, 2:], data[:, 1]
    torch.manual_seed(1337)
    rand_idx = torch.randperm(sampled_x.shape[0])
    sampled_x = sampled_x[rand_idx]
    sampled_y = sampled_y[rand_idx]
    
preprocess = False
normalize_features = False
normalize_labels = True

In [3]:
if preprocess:
    # remove coincident points
    sampled_x, id_unique = np.unique(sampled_x, axis=0, return_index=True)
    sampled_y = sampled_y[id_unique]

    # cut between 0.01 and 0.99 quantile of distances
    kd_tree = ss.KDTree(sampled_x)
    v = kd_tree.query(sampled_x, k=2)[0][:, 1]
    idx = np.argsort(v)
    percentile_start = int(np.round(idx.shape[0]*0.10))
    percentile_end = int(np.round(idx.shape[0]*0.90))
    sampled_x = sampled_x[idx[percentile_start:percentile_end], :]
    sampled_y = sampled_y[idx[percentile_start:percentile_end]]
m = sampled_x.shape[0]

### Trainset & Testset

In [4]:
split = int(0.2 * m)

train_x, train_y = sampled_x[:split], sampled_y[:split]
test_x, test_y = sampled_x[split:], sampled_y[split:]

train_idx = torch.arange(0, split)
sampled_x = torch.from_numpy(sampled_x).float()
train_x = torch.from_numpy(train_x).float()
train_y = torch.from_numpy(train_y).float()
test_x = torch.from_numpy(test_x).float()
test_y = torch.from_numpy(test_y).float()

if normalize_features:
    mu_x, std_x = sampled_x.mean(dim=-2, keepdim=True), sampled_x.std(dim=-2, keepdim=True) + 1e-6
    sampled_x.sub_(mu_x).div_(std_x)
    train_x.sub_(mu_x).div_(std_x)
    test_x.sub_(mu_x).div_(std_x)
    
if normalize_labels:
    mu_y, std_y = train_y.mean(), train_y.std()
    train_y.sub_(mu_y).div_(std_y)
    test_y.sub_(mu_y).div_(std_y)

### Hyperparameters Priors

In [5]:
import scipy.spatial as ss
neighbors = 10
kd_tree = ss.KDTree(sampled_x)
v = np.sort(kd_tree.query(sampled_x, k=neighbors+1)[0][:, 1:].ravel())
percentile_99 = int(np.round(v.shape[0]*0.99))
gamma_rate = 100.0/np.std(v)
gamma_concentration = gamma_rate * v[percentile_99] + 1

### Move Data to Device

In [6]:
sampled_x = sampled_x.contiguous()
# train_idx = train_idx.contiguous()
train_x, train_y = train_x.contiguous(), train_y.contiguous()
test_x, test_y = test_x.contiguous(), test_y.contiguous()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
sampled_x = sampled_x.to(device)
# train_idx = train_idx.to(device)
train_x, train_y = train_x.to(device), train_y.to(device)
test_x, test_y = test_x.to(device), test_y.to(device)

## Model

In [7]:
%%capture
likelihood = gpytorch.likelihoods.GaussianLikelihood(
    noise_constraint=gpytorch.constraints.GreaterThan(1e-8),
    noise_prior=None  # NormalPrior(torch.tensor([0.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

kernel = gpytorch.kernels.ScaleKernel(
    RiemannMaternKernel(
        nu=3,
        nodes=sampled_x,
        neighbors=50,
        operator="randomwalk",
        modes=100,
        ball_scale=3.0,
        support_kernel=gpytorch.kernels.RBFKernel(),
        epsilon_prior=GammaPrior(gamma_concentration, gamma_rate),
        lengthscale_prior=None, # InverseGammaPrior(igamma_concentration, igamma_rate)
    ),
    outputscale_prior=None  # NormalPrior(torch.tensor([1.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

model = RiemannGP(train_x, train_y, likelihood, kernel, train_idx).to(device)

## Train

In [8]:
%%capture
hypers = {
    'likelihood.noise_covar.noise': 1e-2, # 0.037,
    'covar_module.base_kernel.epsilon': 0.5, # 0.029,
    'covar_module.base_kernel.lengthscale': 0.5, # 5.130,
    'covar_module.outputscale': 1.0, # 0.0656,
    'covar_module.base_kernel.support_kernel.lengthscale': 1.0,
}
model.initialize(**hypers)

In [9]:
t0 = time()
model.manifold_informed_train(lr=1e-2, iter=100, norm_step_size=100, verbose=True)
t1 = time()
print("Time: %.2g sec" % (t1 - t0))

Iter: 0, LR: 0.010, Loss: 900.358, NoiseVar: 0.010, SignalVar: 624056.500, Lengthscale: 0.500, Epsilon: 0.500
Iter: 1, LR: 0.010, Loss: 897.037, NoiseVar: 0.010, SignalVar: 624056.500, Lengthscale: 0.496, Epsilon: 0.496
Iter: 2, LR: 0.010, Loss: 895.467, NoiseVar: 0.010, SignalVar: 624056.500, Lengthscale: 0.493, Epsilon: 0.492
Iter: 3, LR: 0.010, Loss: 895.665, NoiseVar: 0.010, SignalVar: 624056.500, Lengthscale: 0.491, Epsilon: 0.488
Iter: 4, LR: 0.010, Loss: 890.094, NoiseVar: 0.010, SignalVar: 624056.500, Lengthscale: 0.491, Epsilon: 0.484
Iter: 5, LR: 0.010, Loss: 892.024, NoiseVar: 0.010, SignalVar: 624056.500, Lengthscale: 0.492, Epsilon: 0.481
Iter: 6, LR: 0.010, Loss: 887.028, NoiseVar: 0.009, SignalVar: 624056.500, Lengthscale: 0.494, Epsilon: 0.477
Iter: 7, LR: 0.010, Loss: 891.085, NoiseVar: 0.009, SignalVar: 624056.500, Lengthscale: 0.495, Epsilon: 0.473
Iter: 8, LR: 0.010, Loss: 884.693, NoiseVar: 0.009, SignalVar: 624056.500, Lengthscale: 0.497, Epsilon: 0.469
Iter: 9, L

Iter: 74, LR: 0.010, Loss: 538.160, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.520, Epsilon: 0.235
Iter: 75, LR: 0.010, Loss: 524.656, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.521, Epsilon: 0.232
Iter: 76, LR: 0.010, Loss: 514.554, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.523, Epsilon: 0.228
Iter: 77, LR: 0.010, Loss: 500.448, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.525, Epsilon: 0.225
Iter: 78, LR: 0.010, Loss: 467.050, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.527, Epsilon: 0.222
Iter: 79, LR: 0.010, Loss: 456.198, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.529, Epsilon: 0.218
Iter: 80, LR: 0.010, Loss: 438.170, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.530, Epsilon: 0.215
Iter: 81, LR: 0.010, Loss: 415.882, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.531, Epsilon: 0.212
Iter: 82, LR: 0.010, Loss: 396.611, NoiseVar: 0.004, SignalVar: 624056.500, Lengthscale: 0.533, Epsilon: 0.208
I

## Evaluation

In [36]:
%%capture
likelihood.eval()
model.eval()


## Metrics

In [37]:
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    preds_test = likelihood(model(test_x))

    mean_test = preds_test.mean
        
    error = test_y - preds_test.mean
    covar = preds_test.lazy_covariance_matrix.evaluate_kernel()
    inv_quad, logdet = covar.inv_quad_logdet(inv_quad_rhs=error.unsqueeze(-1), logdet=True)
    
    rmse = (error.square().sum()/test_y.shape[0]).sqrt()
    nll = 0.5 * sum([inv_quad, logdet, error.size(-1)* np.log(2 * np.pi)])/test_y.shape[0]
    model._clear_cache()
    
print("RMSE: ", rmse)
print("NLL: ", nll)

RMSE:  tensor(0.0079, device='cuda:0')
NLL:  tensor(-1.9166, device='cuda:0')
