# GPs with boundary conditions

In the paper entitled '' (https://export.arxiv.org/pdf/2002.00818), the author claims that a GP can be constrained to match boundary conditions. Consider a GP prior with covariance kernel
$$k_F(x,y) = \exp\left(-\frac{1}{2}(x-y)^2\right)$$

Try and match the boundary conditions:
$$f(0) = f'(0) = f(1) = f'(1) = 0$$

The posterior will be a GP with covariance equal to:
$$\exp\left(-\frac{1}{2}(x-y)^2\right) - \frac{\exp\left(-\frac{1}{2}(x^2+y^2)\right)}{e^{-2} + 3e^{-1} + 1} \cdot \left( (xy+1) + (xy-x-y+2)e^{x+y-1} + (-2xy + x+y-1)(e^{x+y-2}+e^{-1}) + (xy-y+1)e^{y-2} + (xy-x+1)e^{x-2} + (y-x-2)e^{y-1} + (x-y-2)e^{x-1}\right)$$

This notebook compares the constrained and unconstrained kernels.

In [None]:
import GPy
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sample_size = 5
X = np.random.uniform(0, 1., (sample_size, 1))
Y = np.sin(X) + np.random.randn(sample_size, 1)*0.1
testX = np.linspace(0, 1, 100).reshape(-1, 1)

In [None]:
def plotSamples(testY, simY, simMse, ax):
    testY = testY.squeeze()
    simY = simY.squeeze()
    simMse = simMse.squeeze()

    ax.plot(testX.squeeze(), testY, lw=0.2, c='k')
    ax.plot(X, Y, 'ok', markersize=5)

    ax.fill_between(testX.squeeze(), simY - 3*simMse**0.5, simY+3*simMse**0.5, alpha=0.1)
    ax.set_xlabel('Input')
    ax.set_ylabel('Output')


## Unconstrained case

In [None]:
kU = GPy.kern.RBF(1, variance=1, lengthscale=1.)
mU = GPy.models.GPRegression(X, Y, kU, noise_var=0.1)
priorTestY = mU.posterior_samples_f(testX, full_cov=True, size=10)
priorSimY, priorSimMse = mU.predict(testX)

Plot the kernel function

In [None]:
n = 101
xs = np.linspace(0, 1, n)[:,np.newaxis]
KU = np.array([kU.K(x[np.newaxis,:], xs)[0] for x in xs])

ph0 = plt.pcolormesh(xs.T, xs, KU)
plt.title('Unconstrained RBF')
plt.colorbar(ph0)

## Constrained case

In [None]:
def K(x, y=None):
    if y is None: y = x
    bb = (x*y+1) + (x*y-x-y+2)*np.exp(x+y-1) + (x+y-1-2*x*y)*(np.exp(x+y-2)+np.exp(-1)) + (x*y-y+1)*np.exp(y-2) + (x*y-x+1)*np.exp(x-2) + (y-x-2)*np.exp(y-1) + (x-y-2)*np.exp(x-1)
    k = np.exp(-0.5*(x-y)**2.0) - np.exp(-0.5*(x**2.0 + y**2.0)) / (np.exp(-2) - 3*np.exp(-1) + 1) * bb
    return k

KC = [[K(x,y)[0] for x in xs] for y in xs]

In [None]:
plt.pcolormesh(xs.T, xs, KC)
plt.title('Constrained RBF')
plt.colorbar()

## Train the unconstrained model

In [None]:
mU.optimize()
posteriorTestY = mU.posterior_samples_f(testX, full_cov=True, size=10)
postSimY, postSimMse = mU.predict(testX)

f, axs = plt.subplots(1, 2, sharey=True, figsize=(10,5))
plotSamples(priorTestY, priorSimY, priorSimMse, axs[0])
plotSamples(posteriorTestY, postSimY, postSimMse, axs[1])
sns.despine()

# GPy examples

## Combine normal and derivative observations

In [None]:
def plot_gp_vs_real(m, x, yreal, size_inputs, title, fixed_input=1, xlim=[0,11], ylim=[-1.5,3]):
    fig, ax = plt.subplots()
    ax.set_title(title)
    plt.plot(x, yreal, "r", label='Real function')
    rows = slice(0, size_inputs[0]) if fixed_input == 0 else slice(size_inputs[0], size_inputs[0]+size_inputs[1])
    m.plot(fixed_inputs=[(1, fixed_input)], which_data_rows=rows, xlim=xlim, ylim=ylim, ax=ax)

In [None]:
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2

N = 10 # Number of observations
Npred = 100 # Number of prediction points
sigma = 0.2 # Noise of observations
sigma_der = 1e-3 # Noise of derivative observations
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))

#     M = 10 # Number of derivative observations
#     xd = np.array([np.linspace(2,8,M)]).T
#     yd = fd(xd) + np.array(sigma_der*np.random.normal(0,1,(M,1)))

# Specify derivatives at end-points
M = 2
xd = np.atleast_2d([0, 11]).T
yd = np.atleast_2d([0, 0]).T

xpred = np.array([np.linspace(0,11,Npred)]).T
ypred_true = f(xpred)
ydpred_true = fd(xpred)

# squared exponential kernel:
try:
    se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
    # We need to generate separate kernel for the derivative observations and give the created kernel as an input:
    se_der = GPy.kern.DiffKern(se, 0)
except:
    se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
    # We need to generate separate kernel for the derivative observations and give the created kernel as an input:
    se_der = GPy.kern.DiffKern(se, 0)

#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma_der**2)

# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order. Crosscovariances are automatically taken car of
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss_der])
m.optimize(messages=0, ipython_notebook=False)

#Plot the model, the syntax is same as for multioutput models:
plot_gp_vs_real(m, xpred, ydpred_true, [x.shape[0], xd.shape[0]], title='Latent function derivatives', fixed_input=1, xlim=[0,11], ylim=[-1.5,3])
plot_gp_vs_real(m, xpred, ypred_true, [x.shape[0], xd.shape[0]], title='Latent function', fixed_input=0, xlim=[0,11], ylim=[-1.5,3])

#making predictions for the values:
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0,1))])

## Fixed end-points using a Multitask GP with different likelihood functions

In [None]:
N = 10 # Number of observations
Npred = 100 # Number of prediction points
sigma = 0.25 # Noise of observations
sigma_0 = 1e-3 # Noise of zero observations
xlow = 0
xhigh = 10
x = np.array([np.linspace(xlow,xhigh,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))

M = 2
dx = 5
x0 = np.atleast_2d([xlow-dx, xhigh+dx]).T
y0 = np.atleast_2d([0, 0]).T

xpred = np.array([np.linspace(xlow-dx,xhigh+dx,Npred)]).T
ypred_true = f(xpred)

# squared exponential kernel:
try:
    se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
except:
    se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)

# Likelihoods for each task
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss_0 = GPy.likelihoods.Gaussian(variance=sigma_0**2)

# Create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order. Crosscovariances are automatically taken car of
m = GPy.models.MultioutputGP(X_list=[x, x0], Y_list=[y, y0], kernel_list=[se, se], likelihood_list = [gauss, gauss_0])
m.optimize(messages=0, ipython_notebook=False)

In [None]:
# Plot
ylims = [-1.5,3]
fig, ax = plt.subplots(figsize=(8,5))
ax.set_title('Latent function with fixed end-points')
ax.plot(xpred, ypred_true, 'k', label='Real function')

ypred_mean, ypred_var = m.predict([xpred])
ypred_std = np.sqrt(ypred_var)
ax.fill_between(xpred.squeeze(), (ypred_mean - 1.96*ypred_std).squeeze(), (ypred_mean + 1.96*ypred_std).squeeze(), color='r', alpha=0.1, label='Confidence')
ax.plot(xpred, ypred_mean, 'r', label='Mean')
ax.plot(x, y, 'kx', label='Data')
ax.set_ylim(ylims)

ax.plot(x0, y0, 'ro', label='Fixed end-points')
ax.legend()
sns.despine()

## Fixed end-points using MixedNoise likelihood

In [None]:
# Squared exponential kernel:
try:
    se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
except:
    se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)

# MixedNoise Likelihood
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss_0 = GPy.likelihoods.Gaussian(variance=sigma_0**2)
mixed = GPy.likelihoods.MixedNoise([gauss, gauss_0])

# Create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order. Crosscovariances are automatically taken car of
xc = np.append(x, x0, axis=0)
yc = np.append(y, y0, axis=0)
ids = np.append(np.zeros((N,1), dtype=int), np.ones((M,1), dtype=int), axis=0)
Y_metadata = {'output_index':ids}
m = GPy.core.GP(xc, yc, se, likelihood=mixed, Y_metadata=Y_metadata)
m.optimize(messages=0, ipython_notebook=False)

# Plot
fig, ax = plt.subplots(figsize=(8,5))
ax.set_title('Latent function with fixed end-points')
ax.plot(xpred, ypred_true, 'k', label='Real function')
#m.plot(fixed_inputs=[(1, 0)], which_data_rows=slice(0, x.shape[0]), xlim=[-dx,10+dx], ylim=[-1.5,3], ax=ax)
ypred_mean, ypred_var = m.predict(xpred, Y_metadata={'output_index':np.zeros_like(xpred, dtype=int)})
ypred_std = np.sqrt(ypred_var)
ax.fill_between(xpred.squeeze(), (ypred_mean - 1.96*ypred_std).squeeze(), (ypred_mean + 1.96*ypred_std).squeeze(), 
                color='r', alpha=0.1, label='Confidence')
ax.plot(xpred, ypred_mean, 'r-', label='Mean')
ax.plot(x, y, 'kx', label='Data')
ax.set_ylim(ylims)
ax.plot(x0, y0, 'ro', label='Fixed end-points')
ax.legend()

sns.despine()

m