In [1]:
import numpy as np

X = np.linspace(start=0, stop=10, num=1_000).reshape(-1, 1)
y = np.squeeze(X * np.sin(X))

In [None]:
import matplotlib.pyplot as plt

plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted")
plt.legend()
plt.xlabel("$x$")
plt.ylabel("$f(x)$")
_ = plt.title("True generative process")

In [None]:
rng = np.random.RandomState(2137)
training_indices = rng.choice(np.arange(y.size), size=20, replace=False)
X_train, y_train = X[training_indices], y[training_indices]
training_indices

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

kernel = 1 * RBF()
gaussian_process = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gaussian_process.fit(X_train, y_train)
gaussian_process.kernel_

In [None]:
y_pred, std_prediction = gaussian_process.predict(X, return_std=True)

plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted")
plt.scatter(X_train, y_train, label="Observations")
plt.plot(X, y_pred, label="Mean prediction")
plt.fill_between(
    X.ravel(),
    y_pred - 1.96 * std_prediction,
    y_pred + 1.96 * std_prediction,
    alpha=0.5,
    label=r"95% confidence interval",
)
plt.legend()
plt.xlabel("$x$")
plt.ylabel("$f(x)$")
_ = plt.title("Gaussian process regression on noise-free dataset")

In [16]:
import seaborn as sns

In [None]:
sns.lineplot(std_prediction)

In [95]:
from sklearn.model_selection import LeaveOneOut

training_indices = rng.choice(np.arange(y.size), size=10, replace=False)
X_train, y_train = X[training_indices], y[training_indices]
loo = LeaveOneOut()
loo.get_n_splits(X_train)

10

In [96]:
y_preds = []

for i, (train_index, test_index) in enumerate(loo.split(X_train)):

    kernel = 1 * RBF()
    gaussian_process = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
    gaussian_process.fit(X_train[train_index], y_train[train_index])
    y_pred, std_prediction = gaussian_process.predict(
        X_train[test_index], return_std=True
    )
    true = y_train[test_index]

    print(f"For {X_train[test_index]} we predicted {y_pred} but True value is: {true}")
    y_preds.append(y_pred)

For [[5.34534535]] we predicted [-1.73785795e-09] but True value is: [-4.30985709]
For [[2.37237237]] we predicted [7.7304295e-08] but True value is: [1.65016353]
For [[6.45645646]] we predicted [1.104587] but True value is: [1.11312817]


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


For [[9.24924925]] we predicted [1.29553064] but True value is: [1.61518483]
For [[3.55355355]] we predicted [-8.72924083e-10] but True value is: [-1.42286751]
For [[2.52252252]] we predicted [5.72585205e-08] but True value is: [1.4637646]
For [[3.1031031]] we predicted [0.11942288] but True value is: [0.11940756]
For [[8.53853854]] we predicted [6.73948477] but True value is: [6.61479988]
For [[5.18518519]] we predicted [-4.61655431] but True value is: [-4.61636229]
For [[3.71371371]] we predicted [-2.01064781] but True value is: [-2.01066609]


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


In [97]:
y_preds = np.array(y_preds).flatten()

In [98]:
y_preds, y_train

(array([-1.73785795e-09,  7.73042950e-08,  1.10458700e+00,  1.29553064e+00,
        -8.72924083e-10,  5.72585205e-08,  1.19422875e-01,  6.73948477e+00,
        -4.61655431e+00, -2.01064781e+00]),
 array([-4.30985709,  1.65016353,  1.11312817,  1.61518483, -1.42286751,
         1.4637646 ,  0.11940756,  6.61479988, -4.61636229, -2.01066609]))

In [99]:
from sklearn.metrics import r2_score


r2_score(y_train, y_preds)

0.7400951708499011