# Gaussian Process regression

A notebook to generate synthetic data and fit a GP

In [1]:
import numpy as np
import pandas as pd
import GPy

### Generate synthetic data

In [2]:
def synthetic_function(x1, x2, x3, x4):
    """
    Synthetic function combining periodic, polynomial, and interaction terms
    """
    alpha = np.sin(2 * np.pi * x1)
    beta = (x2 - 0.5)**2
    gamma = np.exp(-5 * x3)
    delta = np.sin(2 * np.pi * x4) * x4
    epsilon = x1 * x3
    return alpha + beta + gamma + delta + (0.5 * epsilon)

In [3]:
n_samples = 200

X = np.random.rand(n_samples, 4)
y = synthetic_function(X[:, 0], X[:, 1], X[:, 2], X[:, 3]).reshape(-1, 1)

df = pd.DataFrame(X, columns=["x1", "x2", "x3", "x4"])
df["y"] = y
print(df.head())

         x1        x2        x3        x4         y
0  0.833761  0.226864  0.441414  0.549044 -0.662559
1  0.096403  0.127560  0.935823  0.986611  0.679558
2  0.093961  0.388977  0.311074  0.428190  0.981430
3  0.191785  0.678379  0.876511  0.477221  1.130281
4  0.697150  0.136160  0.157741  0.716799 -1.004834


### Fit a GP

In [4]:
kernel = GPy.kern.RBF(input_dim=4, ARD=True)
model = GPy.models.GPRegression(X, y, kernel)
model.optimize()

<paramz.optimization.optimization.opt_lbfgsb at 0x17e95e2d0>

In [5]:
print(model)


Name : GP regression
Objective : -387.1077895692607
Number of Parameters : 6
Number of Optimization Parameters : 6
Updates : True
Parameters:
  [1mGP_regression.         [0;0m  |                    value  |  constraints  |  priors
  [1mrbf.variance           [0;0m  |        94.92146119326638  |      +ve      |        
  [1mrbf.lengthscale        [0;0m  |                     (4,)  |      +ve      |        
  [1mGaussian_noise.variance[0;0m  |  2.4613019603082085e-120  |      +ve      |        


### Test

In [6]:
X_test = np.array([[0.51, 0.76, 0.94, 0.72]])  # y = -0.48
mean, variance = model.predict(X_test)
print(mean, variance)

[[-0.44271125]] [[1.29607268e-05]]


### Save fitted parameters

In [7]:
import json

params = {
    "lengthscale": model.kern.lengthscale.values.tolist(),
    "kernel-variance": float(model.kern.variance.values),
    "noise-variance": float(model.likelihood.variance.values)
}

with open("model-params.json", "w") as f:
    json.dump(params, f, indent=2)