# Gaussian Process API Tutorial 2

Thank you for checking out our package!

By the end of this tutorial, you should be able to
- Save and load interpolated fits
- Check if two fits are equal

Let's import what we need for this tutorial!

In [None]:
import numpy as np
rs = np.random.RandomState(1)
from matplotlib import pyplot as plt

from gp_api.gaussian_process import GaussianProcess
from gp_api.kernels import CompactKernel, WhiteNoiseKernel

We want to generate plots!

In [None]:
%matplotlib inline

In [None]:
# Create a function to plot the output of a gp
def plot_linear(x_train, y_train, x_test, gp_fit,nerr=100):
    # Compute the mean and variance of the function returned
    y_mean_test = gp_fit.mean(x_test)

    # Compute 90% credible intervals from random samples
    y_samples_test = gp_fit.rvs(n_err, x_test, random_state=rs)
    y_90_lo_test, y_90_hi_test = np.percentile(y_samples_test, [5,95], axis=1)

    # Initialize axis
    fig, ax = plt.subplots()

    # Plot training data on top
    ax.scatter(x_train[:,0], y_train, color='C3',zorder=2)

    # Plot fit mean
    ax.plot(x_test[:,0], y_mean_test, color='C0', zorder=1)

    # Plot the 90 percent credible region
    ax.fill_between(x_test[:,0],y_90_lo_test,y_90_hi_test, color="C0",alpha=0.4,zorder=0)


## Saving and loading fits
We will take our example from the basic linear fit, and save it.

We will then load the fit, and check if it is the same fit.

First, let's demonstrate our final example from the previous notebook

In [None]:
# Define a function to train some data
def fit_compact_nd(x_train, y_train, whitenoise=0.0, sparse=True, use_cython=False,xpy=np):
    # Extract dimensions from data
    ntrain, dim = x_train.shape
    # Create the compact kernel
    k1 = CompactKernel.fit(x_train,method="scott",sparse=sparse,use_cython=use_cython)
    # Check if we are including a whitenoise kernel
    if whitenoise==0.0:
        # If not, the compact kernel is the only kernel we need
        kernel = k1
    else:
        # If we are, we need to define the whitenosie kernel
        k2 = WhiteNoiseKernel.fit(x_train,method="simple",sparse=sparse,scale=whitenoise,use_cython=use_cython)
        # We can add kernels together!
        kernel = k1 + k2
    # Fit the training data using a gaussian process
    gp_fit = GaussianProcess.fit(x_train, y_train, kernel=kernel)
    return gp_fit

In [None]:
# Define number of training points for linear model
n_train = 20
# Define number of test evaluations
n_test = 1000
# Define number of samples for error estimate
n_err = 100
# Define our domain
xmin, xmax = 0.0, 5.0
# Define our linear model
m, b = 4, -3
# Define our noise
x_noise = 0.1
y_noise = 0.1
# Generate a sample space to evaluate the model
x_test = np.linspace(xmin, xmax, n_test)[:,None]

#Create training data
x_train = rs.uniform(xmin, xmax, (n_train,1))
y_train = m*x_train[:,0] + b

# Add noise
x_train[:,0] += x_noise*rs.randn(n_train)
y_train += y_noise*rs.randn(n_train)

# Define a small whitenoise value
eps = 0.01

# Fit the training data using our gaussian process
gp_fit = fit_compact_nd(x_train,y_train,whitenoise=eps)

# Plot the results using the function we defined earlier
plot_linear(x_train, y_train, x_test, gp_fit)

In [None]:
# What file would we like to use?
gp_filename = "test_serialization_fit.hdf5"

# Let's try to remove an existing file
try:
    # Import os things to check for
    import os
    os.remove(gp_filename)
except:
    pass

# Save the fit!
gp_fit.save(gp_filename, label="mylabel")

# Load the fit!
loaded_fit = GaussianProcess.load(gp_filename,label="mylabel")

# Assert original and loaded fits are identical
assert gp_fit.equiv(loaded_fit)

# Assert that evaluations are identical
assert all(gp_fit.mean(x_test) == loaded_fit.mean(x_test))

# Plot the loaded fit
plot_linear(x_train, y_train, x_test, loaded_fit)

# Clean up
try:
    os.remove(gp_filename)
except:
    pass