# Manifold GP Semi-Supervised Learning via Precision Matrix on 2D Manifold

## Preamble

In [1]:
import numpy as np
import torch
import gpytorch

from mayavi import mlab
from importlib.resources import files

from manifold_gp.kernels.riemann_matern_kernel import RiemannMaternKernel
from manifold_gp.models.riemann_gp import RiemannGP
from manifold_gp.utils.generate_truth import groundtruth_from_mesh

## Dataset Preprocessing

### Load

In [2]:
data_path = files('manifold_gp.data').joinpath('dragon10k.stl')
nodes, faces, truth = groundtruth_from_mesh(data_path)

sampled_x = torch.from_numpy(nodes).float()
sampled_y = torch.from_numpy(truth).float()
(m, n) = sampled_x.shape

num_train = 100
num_test = 1000
normalize_features = False
normalize_labels = True

### Trainset & Testset

In [3]:
torch.manual_seed(1337)
rand_idx = torch.randperm(m)
train_idx = rand_idx[:num_train]
train_x, train_y = sampled_x[train_idx, :], sampled_y[train_idx]

noise_train_x = 0.0
train_x += noise_train_x * torch.randn(num_train, n)
noise_train_y = 0.01
train_y += noise_train_y * torch.randn(num_train)

test_idx = rand_idx[num_train:num_train+num_test]
test_x, test_y = sampled_x[test_idx, :], sampled_y[test_idx]

noise_test_x = 0.0
test_x += noise_test_x * torch.randn(num_test, n)
noise_test_y = 0.0
test_y += noise_test_y * torch.randn(num_test)

if normalize_features:
    mu_x, std_x = train_x.mean(dim=-2, keepdim=True), train_x.std(dim=-2, keepdim=True) + 1e-6
    sampled_x.sub_(mu_x).div_(std_x)
    train_x.sub_(mu_x).div_(std_x)
    test_x.sub_(mu_x).div_(std_x)
    
if normalize_labels:
    mu_y, std_y = train_y.mean(), train_y.std()
    train_y.sub_(mu_y).div_(std_y)
    test_y.sub_(mu_y).div_(std_y)
    sampled_y.sub_(mu_y).div_(std_y)

### Move Data to Device

In [4]:
sampled_x, sampled_y = sampled_x.contiguous(), sampled_y.contiguous()
train_x, train_y = train_x.contiguous(), train_y.contiguous()
test_x, test_y = test_x.contiguous(), test_y.contiguous()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
sampled_x, sampled_y = sampled_x.to(device), sampled_y.to(device)
train_x, train_y = train_x.to(device), train_y.to(device)
test_x, test_y = test_x.to(device), test_y.to(device)

if normalize_features:
    mu_x, std_x = mu_x.to(device), std_x.to(device)

## Model

In [5]:
%%capture
likelihood = gpytorch.likelihoods.GaussianLikelihood(
    noise_constraint=gpytorch.constraints.GreaterThan(1e-8),
    noise_prior=None  # NormalPrior(torch.tensor([0.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

kernel = gpytorch.kernels.ScaleKernel(
    RiemannMaternKernel(
        nu=1,
        nodes=train_x,
        neighbors=10,
        operator="randomwalk",
        method="exact",
        modes=50,
        ball_scale=3.0,
        prior_bandwidth=False,
    ),
    outputscale_prior=None  # NormalPrior(torch.tensor([1.0]).to(device),  torch.tensor([1/9]).sqrt().to(device))
)

model = RiemannGP(train_x, train_y, likelihood, kernel).to(device)

## Train

In [6]:
%%capture
hypers = {
    'likelihood.noise_covar.noise': 1e-5,
    'covar_module.base_kernel.epsilon': 0.5027,
    'covar_module.base_kernel.lengthscale': 0.5054,
    'covar_module.outputscale': 1.0,
}
model.initialize(**hypers)

In [7]:
model.manifold_informed_train(lr=1e-1, iter=100, norm_step_size=100, verbose=True)

Iter: 0, Loss: 127.627, NoiseVar: 0.000, SignalVar: 0.97288, Lengthscale: 0.505, Epsilon: 0.503
Iter: 1, Loss: 124.118, NoiseVar: 0.000, SignalVar: 0.91186, Lengthscale: 0.546, Epsilon: 0.464
Iter: 2, Loss: 119.782, NoiseVar: 0.000, SignalVar: 0.85324, Lengthscale: 0.587, Epsilon: 0.428
Iter: 3, Loss: 114.595, NoiseVar: 0.000, SignalVar: 0.79695, Lengthscale: 0.625, Epsilon: 0.395
Iter: 4, Loss: 108.555, NoiseVar: 0.000, SignalVar: 0.74296, Lengthscale: 0.665, Epsilon: 0.363
Iter: 5, Loss: 101.686, NoiseVar: 0.000, SignalVar: 0.69128, Lengthscale: 0.708, Epsilon: 0.333
Iter: 6, Loss: 94.070, NoiseVar: 0.000, SignalVar: 0.64194, Lengthscale: 0.754, Epsilon: 0.306
Iter: 7, Loss: 85.844, NoiseVar: 0.000, SignalVar: 0.59496, Lengthscale: 0.804, Epsilon: 0.280
Iter: 8, Loss: 77.207, NoiseVar: 0.000, SignalVar: 0.55040, Lengthscale: 0.857, Epsilon: 0.256
Iter: 9, Loss: 68.420, NoiseVar: 0.000, SignalVar: 0.50831, Lengthscale: 0.914, Epsilon: 0.233
Iter: 10, Loss: 59.813, NoiseVar: 0.000, Sig

## Model Evaluation

In [8]:
%%capture
likelihood.eval()
model.eval()

Getting faster predictive distributions using [LOVE](https://arxiv.org/abs/1803.06058). We compute the **mean**, the **standard deviation** and one **posterior sample**. In addition we evaluate the kernel at the first point of our dataset.

In [9]:
%%capture
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    preds = likelihood(model(noisy_x))
    mean = preds.mean
    std = preds.stddev
    posterior_sample = preds.sample()
    # kernel_eval = kernel(sampled_x[0, :].unsqueeze(0), sampled_x).evaluate().squeeze()
    
    # Bring data to cpu
    sampled_x = sampled_x.cpu().numpy()
    sampled_y = sampled_y.cpu().numpy()
    train_x = train_x.cpu().numpy()
    test_x = test_x.cpu().numpy()
    # kernel_eval = kernel_eval.cpu().numpy()
    posterior_sample = posterior_sample.cpu().numpy()
    mean = mean.cpu().numpy()
    std = std.cpu().numpy()

NameError: name 'noisy_x' is not defined

## Plot Results

In [13]:
%%capture
%%bash
jupyter nbextension install --py mayavi --user

CalledProcessError: Command 'b'jupyter nbextension install --py mayavi --user\n'' returned non-zero exit status 1.

In [14]:
%%capture
mlab.init_notebook()
v_options = {'mode': 'sphere','scale_factor': 3e-3, 'color': (0, 0, 0)}

ModuleNotFoundError: No module named 'ipyevents'

In [15]:
v_options = {'mode': 'sphere','scale_factor': 3e-3, 'color': (0, 0, 0)}

### Ground Truth with Sample Training Points

In [16]:
mlab.figure(size=(1920, 1360), fgcolor=(0, 0, 0), bgcolor = (1,1,1))
mlab.triangular_mesh(sampled_x[:, 0], sampled_x[:, 1], sampled_x[:, 2], faces, scalars=sampled_y)
mlab.colorbar(orientation='vertical')
mlab.points3d(train_x[:,0], train_x[:,1], train_x[:,2], **v_options)
# mlab.points3d(test_x[:,0], test_x[:,1], test_x[:,2], **v_options)
mlab.view(0.0,180.0,0.5139171204775793)
mlab.savefig('dragon_truth_semisupervised.png')

### Mean

In [17]:
mlab.figure(size=(1920, 1360), fgcolor=(0, 0, 0), bgcolor = (1,1,1))
mlab.triangular_mesh(sampled_x[:, 0], sampled_x[:, 1], sampled_x[:, 2], faces, scalars=mean)
mlab.colorbar(orientation='vertical')
mlab.points3d(train_x[:,0], train_x[:,1], train_x[:,2], **v_options)
mlab.view(0.0,180.0,0.5139171204775793)
mlab.savefig('dragon_mean_semisupervised.png')

### Standard Deviation

In [None]:
mlab.figure(size=(1920, 1360), fgcolor=(0, 0, 0), bgcolor = (1,1,1))
mlab.triangular_mesh(sampled_x[:, 0], sampled_x[:, 1], sampled_x[:, 2], faces, scalars=std)
mlab.colorbar(orientation='vertical')
mlab.points3d(train_x[:,0], train_x[:,1], train_x[:,2], **v_options)
# mlab.view(0.0,180.0,0.5139171204775793)
# mlab.savefig('dragon_std_semisupervised.png')

### One Posterior Sample

In [None]:
mlab.figure(size=(1920, 1360), fgcolor=(0, 0, 0), bgcolor = (1,1,1))
mlab.triangular_mesh(sampled_x[:, 0], sampled_x[:, 1], sampled_x[:, 2], faces, scalars=posterior_sample)
mlab.colorbar(orientation='vertical')
mlab.points3d(train_x[:,0], train_x[:,1], train_x[:,2], **v_options)
# mlab.view(0.0,180.0,0.5139171204775793)
# mlab.savefig('dragon_posterior_semisupervised.png')

### Kernel Evaluation

In [None]:
mlab.figure(size=(1920, 1360), fgcolor=(0, 0, 0), bgcolor = (1,1,1))
mlab.triangular_mesh(sampled_x[:, 0], sampled_x[:, 1], sampled_x[:, 2], faces, scalars=kernel_eval)
mlab.colorbar(orientation='vertical')
mlab.points3d(sampled_x[0,0], sampled_x[0,1], sampled_x[0,2], **v_options)