The following notebook is dedicated to sparse GP and their training / inference (as a prerequisite to the paper on deep GP)

Specifically, I am following these papers:

Titsias - Variational Learning of Inducing Variables in Sparse Gaussian
Processes: http://proceedings.mlr.press/v5/titsias09a/titsias09a.pdf

Titsias - Variational Model Selection for Sparse Gaussian
Process Regression (more details compared to the previous one): https://pdfs.semanticscholar.org/db7b/e492a629a98db7f9d77d552fd3568ff42189.pdf

Note that for now I am mostly interested in inference so I am not going for optiml selection of latent points

In [None]:
import numpy as np
import scipy as sp
import math
import matplotlib.pyplot as plt
%matplotlib inline

## Sparse GP setting

The code below samples a random function, picks observations from this random function to train a sparse GP and trains a sparse GP for a smaller subset of potential observations 

In [None]:
x = np.linspace(0, 1, 1000)
true_y = np.random.randn(1) * np.ones(1000)
for i in range(1, 6):
    true_y += np.random.randn(1) * np.sin(i * math.pi * x)
    true_y += np.random.randn(1) * np.cos(i * math.pi * x)

In [None]:
SIGMA_OBS = 0.5
N_SAMPLES = 100
N_SPARSE = 10
KERNEL_SCALE = 0.1

# Getting a "true" signal
pick_x = np.random.choice(range(len(x)), size=N_SAMPLES, replace=False)
sample_x = x[pick_x]
sample_y = true_y[pick_x] + SIGMA_OBS * np.random.randn(N_SAMPLES)

# Picking random locations for "sparse" latent signal (note that true values are missing)
pick_x = np.random.choice(range(len(x)), size=N_SPARSE, replace=False)
sparse_x = x[pick_x]

## Sampling latent signal

In [None]:
def k_gaussian(x1, x2):
    # Note that compared to previous notebook I hardcoded kernel width
    
    x1_matrix = np.tile(x1, len(x2)).reshape((len(x2), len(x1)))
    x2_matrix = np.tile(x2, len(x1)).reshape((len(x1), len(x2))).transpose()
    
    k_matrix = np.exp(-(x1_matrix - x2_matrix) ** 2 / (2 * KERNEL_SCALE * KERNEL_SCALE))
    
    return k_matrix

In [None]:
sigma_matrix = np.linalg.inv(k_gaussian(sparse_x, sparse_x) 
                             + np.dot(k_gaussian(sample_x, sparse_x), k_gaussian(sparse_x, sample_x)) / SIGMA_OBS ** 2)
sparse_mu = reduce(np.dot, [k_gaussian(sparse_x, sparse_x), sigma_matrix, k_gaussian(sample_x, sparse_x), sample_y]) / SIGMA_OBS ** 2
sparse_cov = reduce(np.dot, [k_gaussian(sparse_x, sparse_x), sigma_matrix, k_gaussian(sparse_x, sparse_x)])

while True:
    try:
        sparse_chol = np.linalg.cholesky(sparse_cov)
        break
    except:
        sparse_cov += 0.0001 * np.eye(len(sparse_x))
        
sparse_y = sparse_mu + np.dot(sparse_chol, np.random.randn(len(sparse_x)))

Variance of the conditional prior (~noise added due to sparsity)

In [None]:
K_sparse = np.dot(k_gaussian(sparse_x, sample_x),
                  np.linalg.inv(k_gaussian(sparse_x, sparse_x) + np.eye(len(sparse_x)) * SIGMA_OBS ** 2)
                 )

posterior_sigma = (k_gaussian(sample_x, sample_x) - 
                   np.dot(K_sparse, k_gaussian(sample_x, sparse_x)))

print np.trace(posterior_sigma)

## Plotting the posteriors

In [None]:
def gp_posterior(sample_x, sample_y, x, sigma_obs):
    # Calculating posterior for gaussian processes
    # I am specifically interested in posterior mean, std and cholesky matrix for postrior at sampled points (for nei)
    # it is assumed that observations have some additional gaussian noise
    
    # Important: the method cannot handle sigma_obs=0 if I want to predict for sample_x
    # Mostly numerical issues: with zero noise matrix to invert may not be positive-semidefinite
    
    # Separately calculating matrix used to calculate both mean and variance
    K = np.dot(k_gaussian(sample_x, x),
               np.linalg.inv(k_gaussian(sample_x, sample_x) + np.eye(len(sample_x)) * sigma_obs ** 2)
              )
    
    mu = np.dot(K, sample_y)
    sigma = k_gaussian(x, x) - np.dot(K, k_gaussian(x, sample_x))
    std_1d = np.sqrt([sigma[i, i] for i in range(len(mu))])
    
    return mu.reshape(-1), std_1d.reshape(-1)

In [None]:
true_posterior_mu, true_posterior_std = gp_posterior(sample_x, sample_y, x, SIGMA_OBS)
sparse_posterior_mu, sparse_posterior_std = gp_posterior(sparse_x, sparse_y, x, SIGMA_OBS)

plt.plot(x, true_y, color="b", label="true signal")
plt.plot(sample_x, sample_y, ".", color="b", label="sampled_signal")
plt.plot(sparse_x, sparse_y, ".", color="r", label="latent_signal")
plt.fill_between(x, true_posterior_mu - 2 * true_posterior_std, 
                 true_posterior_mu + 2 * true_posterior_std, color="b", alpha=0.25, label="true_posterior")
plt.fill_between(x, sparse_posterior_mu - 2 * sparse_posterior_std, 
                 sparse_posterior_mu + 2 * sparse_posterior_std, color="r", alpha=0.25, label="sparse_posterior")
plt.title("Sparse GP inference")
plt.legend()
plt.show()