Skip to content

Commit

Permalink
FIX compute y_std and y_cov properly with multi-target in GPR (#20761)
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickctrf authored and glemaitre committed Oct 25, 2021
1 parent 1ed41f5 commit d4a48aa
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 4 deletions.
8 changes: 8 additions & 0 deletions doc/whats_new/v1.0.rst
Expand Up @@ -43,6 +43,14 @@ Fixed models
between sparse and dense input. :pr:`21195`
by :user:`Jérémie du Boisberranger <jeremiedbb>`.

:mod:`sklearn.gaussian_process`
...............................

- |Fix| Compute `y_std` properly with multi-target in
:class:`sklearn.gaussian_process.GaussianProcessRegressor` allowing
proper normalization in multi-target scene.
:pr:`20761` by :user:`Patrick de C. T. R. Ferreira <patrickctrf>`.

:mod:`sklearn.feature_extraction`
.................................

Expand Down
22 changes: 18 additions & 4 deletions sklearn/gaussian_process/_gpr.py
Expand Up @@ -349,11 +349,12 @@ def predict(self, X, return_std=False, return_cov=False):
y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets)
Mean of predictive distribution a query points.
y_std : ndarray of shape (n_samples,), optional
y_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional
Standard deviation of predictive distribution at query points.
Only returned when `return_std` is True.
y_cov : ndarray of shape (n_samples, n_samples), optional
y_cov : ndarray of shape (n_samples, n_samples) or \
(n_samples, n_samples, n_targets), optional
Covariance of joint predictive distribution a query points.
Only returned when `return_cov` is True.
"""
Expand Down Expand Up @@ -403,7 +404,14 @@ def predict(self, X, return_std=False, return_cov=False):
y_cov = self.kernel_(X) - V.T @ V

# undo normalisation
y_cov = y_cov * self._y_train_std ** 2
y_cov = np.outer(y_cov, self._y_train_std ** 2).reshape(
*y_cov.shape, -1
)

# if y_cov has shape (n_samples, n_samples, 1), reshape to
# (n_samples, n_samples)
if y_cov.shape[2] == 1:
y_cov = np.squeeze(y_cov, axis=2)

return y_mean, y_cov
elif return_std:
Expand All @@ -424,7 +432,13 @@ def predict(self, X, return_std=False, return_cov=False):
y_var[y_var_negative] = 0.0

# undo normalisation
y_var = y_var * self._y_train_std ** 2
y_var = np.outer(y_var, self._y_train_std ** 2).reshape(
*y_var.shape, -1
)

# if y_var has shape (n_samples, 1), reshape to (n_samples,)
if y_var.shape[1] == 1:
y_var = np.squeeze(y_var, axis=1)

return y_mean, np.sqrt(y_var)
else:
Expand Down
30 changes: 30 additions & 0 deletions sklearn/gaussian_process/tests/test_gpr.py
Expand Up @@ -652,3 +652,33 @@ def test_gpr_predict_error():
err_msg = "At most one of return_std or return_cov can be requested."
with pytest.raises(RuntimeError, match=err_msg):
gpr.predict(X, return_cov=True, return_std=True)


def test_y_std_with_multitarget_normalized():
"""Check the proper normalization of `y_std` and `y_cov` in multi-target scene.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/17394
https://github.com/scikit-learn/scikit-learn/issues/18065
"""
rng = np.random.RandomState(1234)

n_samples, n_features, n_targets = 12, 10, 6

X_train = rng.randn(n_samples, n_features)
y_train = rng.randn(n_samples, n_targets)
X_test = rng.randn(n_samples, n_features)

# Generic kernel
kernel = WhiteKernel(1.0, (1e-1, 1e3)) * C(10.0, (1e-3, 1e3))

model = GaussianProcessRegressor(
kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=True
)
model.fit(X_train, y_train)
y_pred, y_std = model.predict(X_test, return_std=True)
_, y_cov = model.predict(X_test, return_cov=True)

assert y_pred.shape == (n_samples, n_targets)
assert y_std.shape == (n_samples, n_targets)
assert y_cov.shape == (n_samples, n_samples, n_targets)

0 comments on commit d4a48aa

Please sign in to comment.