In [1]:
import sklearn.gaussian_process as gp

In [2]:
# X_tr <-- training observations [# points, # features]
# y_tr <-- training labels [# points]
# X_te <-- test observations [# points, # features]
# y_te <-- test labels [# points]

In [3]:
# The form of the mean function and covariance kernel function in the GP prior is chosen and tuned during model selection. 
# The mean function is typically constant, either zero or the mean of the training dataset.
# The kernel has two hyperparameters: signal variance, σ², and lengthscale, l.
kernel = gp.kernels.ConstantKernel(1.0, (1e-1, 1e3)) * gp.kernels.RBF(10.0, (1e-3, 1e3))

In [None]:
model = gp.GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=True)

In [None]:
# A popular approach to tune the hyperparameters of the covariance kernel function is to maximize the log marginal likelihood of the training data.
# A gradient-based optimizer is typically used for efficiency; if unspecified above, the default optimizer is ‘fmin_l_bfgs_b’. 
# Because the log marginal likelihood is not necessarily convex, multiple restarts of the optimizer with different 
# initializations is used (n_restarts_optimizer)
model.fit(X_tr, y_tr)
params = model.kernel_.get_params()

In [None]:
y_pred, std = model.predict(X_te, return_std=True)

In [None]:
MSE = ((y_pred-y_te)**2).mean()