# Gaussian Processes

## Introduction


...

In this article, we'll first revise the fundamentals of Gaussian processes

## Getting Started
The examples in this notebook make use of additional libraries. Make sure you have installed the requirements, and then run the cell below once to import modules before continuing.

In [357]:
import numpy as np
import matplotlib.pyplot as plt
import GPy
GPy.plotting.change_plotting_library('plotly')

# Define function to plot GPs, taken from http://krasserm.github.io/2018/03/19/gaussian-processes/
def plot_gp(mu, cov, X, X_train=None, Y_train=None, samples=[]):
    X = X.ravel()
    mu = mu.ravel()
    uncertainty = 1.96 * np.sqrt(np.diag(cov))
    
    plt.fill_between(X, mu + uncertainty, mu - uncertainty, alpha=0.1)
    plt.plot(X, mu, label='Mean')
    for i, sample in enumerate(samples):
        plt.plot(X, sample, lw=1, ls='--', label=f'Sample {i+1}')
    if X_train is not None:
        plt.plot(X_train, Y_train, 'rx')
    plt.legend()

# Re-implement in plotly for more aesthetically pleasing plots.
# GPy's built-in plotly implementation is broken due to deprecated functions. :(
import plotly.graph_objects as go
from plotly.subplots import make_subplots

colorsHex = {\
    "black": "#000000",\
    "lightBlue":"#729fcf",\
    "mediumBlue":"#2077b4",\
    "darkBlue": "#204a87",\
    "orange": "#fe9130",\
    "green": "#43a944",\
    "red": "#d83939",\
    "purple": "#a985ca",\
    "brown": "#a2776f",\
    "pink": "#e892cd"}

# Choose colors to match the matplotlib version, for first 6 samples.
colorWheel = ["orange", "green", "red", "purple", "brown", "pink"]

def hex2rgb(hexcolor):
    hexcolor = [hexcolor[1+2*i:1+2*(i+1)] for i in range(3)]
    r,g,b = [int(n,16) for n in hexcolor]
    return (r,g,b)

def plotly_fill_between(ax, X, lower, upper, color=colorsHex['mediumBlue'], label=None, hide_legend=False, line_kwargs=None, **kwargs):
    if not 'line' in kwargs:
        kwargs['line'] = go.scatter.Line(**line_kwargs or {})
    else:
        kwargs['line'].update(line_kwargs or {})
        
    if color.startswith('#'):
        fcolor = 'rgba({c[0]}, {c[1]}, {c[2]}, {alpha})'.format(c=hex2rgb(color), alpha=0.1)
    else:
        fcolor = color
        
    u = go.Scatter(x=X, y=upper, fill='tonextx', fillcolor=fcolor, showlegend=(not hide_legend) and label is not None, name=label, legendgroup='{}_fill_({},{})'.format(label, ax[0], ax[1]), **kwargs)
    l = go.Scatter(x=X, y=lower, fillcolor=fcolor, showlegend=False, name=label, legendgroup='{}_fill_({},{})'.format(label, ax[0], ax[1]), **kwargs)
    return l, u

def plotly_gp(mu, cov, X, X_train=None, Y_train=None, samples=[], title=""):
    '''
    Plot a Gaussian process using plotly.
    
    Args:
        mu -- the mean vector
        cov -- the covariance matrix
        X -- the grid of X values
        X_train, Y_train -- the training data i.e. any points with known values, to be plotted as scatter points.
        samples -- samples (functions) drawn from the Gaussian process.
    '''
    fig = go.Figure(layout=go.Layout(hovermode='x'))
    fig.update_layout(title_text=title)
    
    X = np.squeeze(X)
    mu = np.squeeze(mu)
    
    # Plot confidence interval as shaded area
    uncertainty = 1.96 * np.sqrt(np.diag(cov))
    confidence_line = {'color': colorsHex['darkBlue'], 'width': 0.5 }
    l, u = plotly_fill_between((fig.layout.xaxis, fig.layout.yaxis), X, mu - uncertainty, mu + uncertainty, label="Confidence", line_kwargs=confidence_line)
    fig.add_trace(l)
    fig.add_trace(u)
    
    # Plot mean
    fig.add_trace(go.Scatter(x=X, y=mu, name='Mean', line=go.scatter.Line(color=colorsHex['mediumBlue'])))
    
    # Plot samples
    for i, sample in enumerate(samples):
        line_color = colorsHex[colorWheel[i % len(colorWheel)]]
        fig.add_trace(go.Scatter(x=X, y=samples[i], name=f'Sample {i+1}', line=go.scatter.Line(color=line_color)))
    
    
    # Plot training data, if any
    if X_train is not None and Y_train is not None:
        fig.add_trace(go.Scatter(x=X_train.ravel(), y=Y_train.ravel(), mode='markers', marker=dict(color=colorsHex['black'], symbol='x'), name='Data'))
    
    fig.show()
    
def plotly_gps(data=[], nrows=1, ncols=1, title=""):
    '''
    Plot several Gaussian processes together as subplots.
    Plots are similar in style to plotly_gp but share a single legend.
    
    Args:
        data -- a dict/object including the args to plotly i.e. mu, cov, X, and optionally X_train, Y_train, samples, etc...
        nrows, ncols -- the dimensions of the subplot grid, s.t. nrows * ncols <= len(data)
        title -- the main title of the figure
    '''
    titles = [subplot_data['title'] if 'title' in subplot_data else None for subplot_data in data]
    fig = make_subplots(rows=nrows, cols=ncols, subplot_titles=titles)
    fig.update_layout(title_text=title, hovermode='x')
    
    for r in range(nrows):
        for c in range(ncols):
            subplot_data = data[r * ncols + c]
            mu = np.squeeze(subplot_data['mu'])
            cov = subplot_data['cov']
            X = np.squeeze(subplot_data['X'])
            X_train = subplot_data['X_train'] if 'X_train' in subplot_data else None
            Y_train = subplot_data['Y_train'] if 'Y_train' in subplot_data else None
            samples = subplot_data['samples'] if 'samples' in subplot_data else []
            
            showlegend = r == 0 and c == 0
            
            # Plot confidence
            uncertainty = 1.96 * np.sqrt(np.diag(cov))
            confidence_line = {'color': colorsHex['darkBlue'], 'width': 0.5 }
            # Pass (0,0) as axes for shared legend group across all subplots
            l, u = plotly_fill_between((0,0), X, mu - uncertainty, mu + uncertainty, label="Confidence", hide_legend = not showlegend, line_kwargs=confidence_line)
            fig.add_trace(l, row=r+1, col=c+1)
            fig.add_trace(u, row=r+1, col=c+1)
            
            # Plot mean
            fig.add_trace(go.Scatter(x=X, y=mu, name='Mean', legendgroup='Mean', showlegend=showlegend, line=go.scatter.Line(color=colorsHex['mediumBlue'])), row=r+1, col=c+1)

            # Plot samples
            for i, sample in enumerate(samples):
                line_color = colorsHex[colorWheel[i % len(colorWheel)]]
                fig.add_trace(go.Scatter(x=X, y=samples[i], name=f'Sample {i+1}', legendgroup=f'Sample {i+1}', showlegend=showlegend, line=go.scatter.Line(color=line_color)), row=r+1, col=c+1)

            # Plot training
            
    fig.show()


## Multivariate Normal (MVN)


## Gaussian Processes

### Covariance Functions

Polynomial
$$ k({\bf x}_1, {\bf x}_2) = (x + {\bf x}_1^\top {\bf x}_2)^k $$

Exponential
$$ k({\bf x}_1, {\bf x}_2) = \exp \Big( -\frac{|{\bf x}_1 - {\bf x}_2|}{l}\Big)$$

Squared exponential (RBF)
$$ k({\bf x}_1, {\bf x}_2) = \sigma_f^2 \exp \Big( - \frac{|{\bf x}_1 - {\bf x}_2|^2}{2l^2} \Big)$$

Gamma exponential
$$ k({\bf x}_1, {\bf x}_2) = \exp \Big( - \Big( \frac{|{\bf x}_1 - {\bf x}_2 |}{l} \Big)^\gamma \Big) $$

Rational quadratic
$$ k({\bf x}_1, {\bf x}_2) = \Big( 1 + \frac{|{\bf x}_1 - {\bf x}_2|^2}{2 \alpha l^2} \Big)^{-\alpha} $$

Neural network (arc sine kernel)
$$ k({\bf x}_1, {\bf x}_2) = \sigma^2 \frac{2}{\pi} {\rm asin}\Big( \frac{ \sigma_w^2 {\bf x}_1^\top {\bf x}_2 + \sigma_b^2 }{\sqrt{\sigma_w^2 {\bf x}_1^\top {\bf x}_1 + \sigma_b^2 + 1} \sqrt{\sigma_w^2 {\bf x}_2^\top  {\bf x}_2 + \sigma_b^2 + 1 }} \Big) $$

In [381]:
# Define kernels

def squared_dist(x1,x2):
    return np.sum(x1**2, 1).reshape(-1,1) + np.sum(x2**2, 1) - 2 * np.dot(x1,x2.T)

def poly(x1, x2, x, k):
    '''
    Polynomial kernel, as defined above.
    '''
    # TODO: fix this -- something wrong with the dot product?
    return (x + np.dot(x1, x2))**k
    
def exp(x1, x2, l=1.):
    '''
    Exponential kernel, as defined above.
    
    Args:
        x1 -- array of m points (m x d)
        x2 -- array of n points (n x d)
        l -- length or smoothness parameter
    '''
    return np.exp(- np.sqrt(squared_dist(x1,x2)) / l)
    
def rbf(x1, x2, l=1.0, sigma_f=1.0):
    '''
    RBF or squared exponential kernel, as defined above. 
    
    Args:
        x1 -- array of m points (m x d)
        x2 -- array of n points (n x d)
        l -- length or smoothness parameter
        sigma_f -- variance
        
    Returns:
        (m x n) matrix
    '''
    return sigma_f**2 * np.exp(-0.5 / l**2 * squared_dist(x1, x2))

def gamma_exp(x1, x2, l=.5, gamma=.5):
    '''
    Gamma exponential kernel, as defined above.
    
    Args:
        x1 -- array of m points (m x d)
        x2 -- array of n points (n x d)
        l -- length or smoothness parameter
        gamma
    '''
    return np.exp(- (squared_dist(x1, x2) / l)**gamma)

def rational_quadratic(x1, x2, l=3., alpha=3.):
    '''
    Rational quadratic kernel, as defined above.
    
    Args:
        x1 -- array of m points (m x d)
        x2 -- array of n points (n x d)
        l -- length or smoothness parameter
        alpha --
    '''
    return (1 + ( (squared_dist(x1, x2) ) / (2 * alpha * l**2) ) )**(-alpha)

def neural_network(x1, x2, sigma=1., sigma_w=1., sigma_b=1.):
    '''
    Neural network kernel, as defined above. Also known as the arc sine or MLP kernel.
    
    Args:
        x1 -- array of m points (m x d)
        x2 -- array of n points (n x d)
        sigma -- defines the variance sigma**2
        sigma_w -- defines variance sigma_w**2 of prior over input weights
        sigma_b -- define variance sigma_b**2 of prior over bias parameters
    '''
    # TODO: fix this -- something wrong with the dot products?
    numerator = sigma_w**2 * np.dot(x1, x2) + sigma_b**2
    sqrt1 = np.sqrt(sigma_w**2 * np.dot(x1, x1) + sigma_b**2 + 1)
    sqrt2 = np.sqrt(sigma_w**2 * np.dot(x2, x2) + sigma_b**2 + 1)
    denominator = sqrt1 * sqrt2
    asin = np.arcsin(numerator / denominator)
    return sigma**2 * (2 / np.pi) * asin
    
gpy_rbf = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)

In [388]:
# Plot priors
NSAMPLES = 3
X = np.linspace(-5., 5., 500)[:, None]
mu = np.zeros((500))

# 1. RBF or Squared Exponential
l1 = .5
sigma_f = 1.
rbf_cov = rbf(X,X,l=l1, sigma_f=sigma_f)
rbf_samples = np.random.multivariate_normal(mu, cov, NSAMPLES)
rbf_title = "Squared Exponential (RBF) (l={l}, \u03C3_f={sigma_f})".format(l=l1, sigma_f=sigma_f)

# 2. Gamma Exponential
l2 = .5
gamma = .5
gamma_cov = gamma_exp(X, X, l = l2, gamma=gamma)
gamma_samples = np.random.multivariate_normal(mu, gamma_cov, NSAMPLES)
gamma_title = "Gamma Exponential (l={l}, \u03B3={gamma})".format(l=l2, gamma=gamma)

# 3. Rational Quadratic
l3 = 3.
alpha = 3.
rq_cov = rational_quadratic(X, X, l=l3, alpha=alpha)
rq_samples = np.random.multivariate_normal(mu, rq_cov, NSAMPLES)
rq_title = "Rational Quadratic (l={l}, \u03B1={alpha})".format(l=l3, alpha=alpha)

# 4. Neural Network
sigma = 1.
sigma_w = 3.5
sigma_b = 3
gpy_nn = GPy.kern.MLP(input_dim=1, variance=sigma**2, weight_variance=sigma_w**2, bias_variance=sigma_b**2)
nn_cov = gpy_nn.K(X, X)
nn_samples = np.random.multivariate_normal(mu, nn_cov, NSAMPLES)
nn_title = "Neural Network (\u03C3={sigma}, \u03C3_w={sigma_w}, \u03C3_b={sigma_b})".format(sigma=sigma, sigma_w=sigma_w, sigma_b=sigma_b)

# 5. Exponential
l5 = 1.
exp_cov = exp(X, X, l=l5)
exp_samples = np.random.multivariate_normal(mu, exp_cov, NSAMPLES)
exp_title = "Exponential (l={l})".format(l=l5)

# 6. Polynomial
k = 3.
gpy_poly = GPy.kern.Poly(input_dim=1, order=k)
poly_cov = gpy_poly.K(X, X)
poly_samples = np.random.multivariate_normal(mu, poly_cov, NSAMPLES)
poly_title = "Polynomial (k={order})".format(order=k)

# plot_gp(mu, cov, X, samples=rbf_samples)

plotly_gp(mu, cov, X, samples=rbf_samples, title=rbf_title)
plotly_gp(mu, gamma_cov, X, samples=gamma_samples, title=gamma_title)
plotly_gp(mu, rq_cov, X, samples=rq_samples, title=rq_title)
plotly_gp(mu, nn_cov, X, samples=nn_samples, title=nn_title)
# plotly_gp(mu, exp_cov, X, samples=exp_samples, title=exp_title)


In [389]:
# View the prior plots in a grid for comparison
subplot_data = [{'mu': mu, 'cov': rbf_cov, 'X': X, 'samples': rbf_samples, 'title': rbf_title}, {'mu': mu, 'cov': gamma_cov, 'X':X, 'samples': gamma_samples, 'title': gamma_title}, {'mu': mu, 'cov': rq_cov, 'X': X, 'samples': rq_samples, 'title': rq_title}, {'mu': mu, 'cov': nn_cov, 'X': X, 'samples': nn_samples, 'title': nn_title}, {'mu': mu, 'cov': exp_cov, 'X':X, 'samples':exp_samples, 'title':exp_title}, {'mu': mu, 'cov': poly_cov, 'X':X, 'samples':poly_samples, 'title':poly_title}]
plotly_gps(subplot_data, 3, 2, title="Gaussian Process Priors")

## Gaussian Process Regression

In [376]:
TRAIN_SIZE = 5
NOISE = 0.05

X = np.random.uniform(-5., 5, (TRAIN_SIZE, 1))
Y = np.sin(np.pi*X/2) + np.random.randn(TRAIN_SIZE, 1)*NOISE

kernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)
model = GPy.models.GPRegression(X, Y, kernel)

# GPy will estimate noise by default
# model.Gaussian_noise.variance = NOISE**2
# model.Gaussian_noise.variance.fix()

model.constrain_positive('')
model.optimize(messages=True)

Xp = np.linspace(-5., 5., 500)[:, None]

# The 'prediction' is the mean of the GP
mup, covp = model.predict(Xp, full_cov=True)

# We can then draw samples from the posterior GP
# samples = np.random.multivariate_normal(mup[:,0], covp)
posteriorYp = model.posterior_samples_f(Xp, full_cov=True, size=3)

# Reshape samples to form expected by plotting function
samples = np.transpose(np.squeeze(posteriorYp))
plotly_gp(mup, covp, Xp, X_train=X, Y_train=Y, samples=samples, title="Posterior")


HBox(children=(VBox(children=(IntProgress(value=0, max=1000), HTML(value=''))), Box(children=(HTML(value=''),)…