# Benchmark Manifold GP Semi-Supervised Learning

## Preamble

This notebook provides an example of how to perform Gaussian Process Regression on a 1D manifold. In this example we consider a supervised learning scenario, namely the number of labeled data points is equivalent to the number of the sampled points from the underlying manifold.

In [1]:
import torch
import gpytorch
import numpy as np
import os
import scipy.spatial as ss
from scipy.io import loadmat
from time import time
from manifold_gp.kernels.riemann_matern_kernel import RiemannMaternKernel
from manifold_gp.models.riemann_gp import RiemannGP
from gpytorch.priors import NormalPrior, GammaPrior
from manifold_gp.utils.torch_helper import memory_allocation

## Dataset Preprocessing

### Load & Settings

In [2]:
# bike, buzz_tomshardware, buzz_twitter, ctslices, elevators, protein, song, mnist, mnist_single
dataset = 'ctslices'
samples_split = 0.9
preprocess = False
normalize_features = False
normalize_labels = True

data = np.load('../datasets/'+dataset+'.npy')

x = data[:, :-1]
y = data[:, -1]

idx = np.random.permutation(x.shape[0])
x = x[idx, :]
y = y[idx]

# x = data[:, :-1]
# x = x - x.min(0)[0]
# x = 2 * (x/ x.max(0)[0]) - 1
# mu, std = x.mean(axis=0), x.std(axis=0)
# x = (x - mu)/std

num_samples = int(samples_split * len(data))
sampled_x, sampled_y = x[:num_samples, :], data[:num_samples, -1]
test_x, test_y = x[num_samples:, :], data[num_samples:, -1]

In [3]:
sampled_x.shape

(48150, 385)

In [3]:
if preprocess:
    # # remove coincident points
    # sampled_x, id_unique = np.unique(sampled_x, axis=0, return_index=True)
    # sampled_y = sampled_y[id_unique]

    # cut between x% and y% percentile of distances
    num_avg = 1
    p_start, p_end = 0.1, 0.9
    num_samples = sampled_x.shape[0]
    
    import faiss
    res = faiss.StandardGpuResources()
    knn = faiss.GpuIndexIVFFlat(res, sampled_x.shape[1], 1, faiss.METRIC_L2)
    knn.train(sampled_x)
    knn.add(sampled_x)
    v = np.sqrt(knn.search(sampled_x, num_avg+1)[0][:,1:])
    idx = np.argsort(v.mean(axis=1))
    idx = np.delete(idx, np.arange(int(num_samples*p_start),int(num_samples*p_end)))
    sampled_x = np.delete(sampled_x, idx, axis=0)
    sampled_y = np.delete(sampled_y, idx)
m = sampled_x.shape[0]

### Trainset & Testset

In [4]:
train_split = int(0.05 * m)
train_idx = torch.arange(0, train_split)
train_x, train_y = sampled_x[:train_split], sampled_y[:train_split]

sampled_x = torch.from_numpy(sampled_x).float()
train_x, train_y = torch.from_numpy(train_x).float(), torch.from_numpy(train_y).float()
test_x, test_y = torch.from_numpy(test_x).float(), torch.from_numpy(test_y).float()

if normalize_features:
    mu_x, std_x = sampled_x.mean(dim=-2, keepdim=True), sampled_x.std(dim=-2, keepdim=True) + 1e-6
    sampled_x.sub_(mu_x).div_(std_x)
    train_x.sub_(mu_x).div_(std_x)
    test_x.sub_(mu_x).div_(std_x)
    
if normalize_labels:
    mu_y, std_y = train_y.mean(), train_y.std()
    train_y.sub_(mu_y).div_(std_y)
    test_y.sub_(mu_y).div_(std_y)

### Move Data to Device

In [5]:
sampled_x = sampled_x.contiguous()
train_idx = train_idx.contiguous()
train_x, train_y = train_x.contiguous(), train_y.contiguous()
test_x, test_y = test_x.contiguous(), test_y.contiguous()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

sampled_x = sampled_x.to(device)
train_idx = train_idx.to(device)
train_x, train_y = train_x.to(device), train_y.to(device)
test_x, test_y = test_x.to(device), test_y.to(device)

## Model

In [6]:
%%capture
likelihood = gpytorch.likelihoods.GaussianLikelihood(
    noise_constraint=gpytorch.constraints.GreaterThan(1e-8),
    noise_prior=None  # NormalPrior(torch.tensor([0.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

kernel = gpytorch.kernels.ScaleKernel(
    RiemannMaternKernel(
        nu=4,
        nodes=sampled_x,
        neighbors=100,
        operator="randomwalk",
        method="exact",
        modes=500,
        ball_scale=100.0,
        prior_bandwidth=True,
    ),
    outputscale_prior=None # NormalPrior(torch.tensor([1.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

model = RiemannGP(train_x, train_y, likelihood, kernel, train_idx).to(device)

## Train

In [7]:
%%capture
hypers = {
    'likelihood.noise_covar.noise': 1e-2,
    'covar_module.base_kernel.epsilon': 1.5,
    'covar_module.base_kernel.lengthscale': 10.0,
    'covar_module.outputscale': 1.0,
}
model.initialize(**hypers)

In [8]:
t0 = time()
model.manifold_informed_train(lr=1e-2, iter=100,
                              decay_step_size=100, decay_magnitude=1.0, 
                              norm_step_size=10, norm_rand_vec=100, 
                              verbose=True, save=False)
t1 = time()
print("Time: %.2g sec" % (t1 - t0))

Iter: 0, Loss: 4285.349, NoiseVar: 0.010, SignalVar: 0.00085, Lengthscale: 10.000, Epsilon: 1.500
Iter: 1, Loss: 4198.303, NoiseVar: 0.010, SignalVar: 0.00086, Lengthscale: 10.010, Epsilon: 1.508
Iter: 2, Loss: 4135.141, NoiseVar: 0.010, SignalVar: 0.00086, Lengthscale: 10.020, Epsilon: 1.516
Iter: 3, Loss: 4073.111, NoiseVar: 0.010, SignalVar: 0.00087, Lengthscale: 10.030, Epsilon: 1.523
Iter: 4, Loss: 3992.686, NoiseVar: 0.010, SignalVar: 0.00088, Lengthscale: 10.040, Epsilon: 1.531
Iter: 5, Loss: 3972.336, NoiseVar: 0.011, SignalVar: 0.00089, Lengthscale: 10.049, Epsilon: 1.539
Iter: 6, Loss: 3936.304, NoiseVar: 0.011, SignalVar: 0.00090, Lengthscale: 10.059, Epsilon: 1.546
Iter: 7, Loss: 3899.121, NoiseVar: 0.011, SignalVar: 0.00091, Lengthscale: 10.068, Epsilon: 1.554
Iter: 8, Loss: 3838.942, NoiseVar: 0.011, SignalVar: 0.00092, Lengthscale: 10.077, Epsilon: 1.561
Iter: 9, Loss: 3809.182, NoiseVar: 0.011, SignalVar: 0.00092, Lengthscale: 10.085, Epsilon: 1.568
Iter: 10, Loss: 3796

In [22]:
torch.save(model.state_dict(), '../models/model_state.pth')

In [7]:
model.load_state_dict(torch.load('../models/model_state.pth'))

<All keys matched successfully>

## Evaluation

In [8]:
kernel.base_kernel.method = "lanczos"
kernel.base_kernel.modes = 4000

In [9]:
%%capture
likelihood.eval()
model.eval()
torch.cuda.empty_cache()


## Metrics

In [10]:
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    preds_test = likelihood(model(test_x))

    mean_test = preds_test.mean
        
    error = test_y - preds_test.mean
    covar = preds_test.lazy_covariance_matrix.evaluate_kernel()
    inv_quad, logdet = covar.inv_quad_logdet(inv_quad_rhs=error.unsqueeze(-1), logdet=True)
    
    rmse = (error.square().sum()/test_y.shape[0]).sqrt()
    nll = 0.5 * sum([inv_quad, logdet, error.size(-1)* np.log(2 * np.pi)])/test_y.shape[0]
    model._clear_cache()
    
print("RMSE: ", rmse)
print("NLL: ", nll)

RMSE:  tensor(3.8087, device='cuda:0')
NLL:  tensor(113.0177, device='cuda:0')
