Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG+1] API Change default multioutput in RegressorMixin.score to keep consistent with metrics.r2_score #13157

Merged
merged 11 commits into from Mar 15, 2019
@@ -50,6 +50,16 @@ Support for Python 3.4 and below has been officially dropped.
section should be ordered according to the label ordering above. Entries
should end with: :issue:`123456` by :user:`Joe Bloggs <joeongithub>`.
:mod:`sklearn.base`
...................

- |API| The R2 score used when calling ``score`` on a regressor will use
``multioutput='uniform_average'`` from version 0.23 to keep consistent with
:func:`metrics.r2_score`. This will influence the ``score`` method of all
the multioutput regressors (except for
:class:`multioutput.MultiOutputRegressor`).
:issue:`13157` by :user:`Hanmin Qin <qinhanmin2014>`.

:mod:`sklearn.calibration`
..........................

@@ -359,10 +359,32 @@ def score(self, X, y, sample_weight=None):
-------
score : float
R^2 of self.predict(X) wrt. y.
Notes
-----
The default value of ``multioutput`` in the ``score`` method of will
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

rm "of"

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

I think this wording is still a bit circuitous.

change from ``variance_weighted`` to ``uniform_average`` in 0.23 to
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

my preference is for quotes rather than code when we are talking about string constants

keep consistent with ``metrics.r2_score``. This will influence the
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

use single backticks

``score`` method of all the multioutput regressors (except for
``multioutput.MultiOutputRegressor``). To use the new default, please
either call ``metrics.r2_score`` directly or make a custom scorer with
``metric.make_scorer``.
"""

from .metrics import r2_score
return r2_score(y, self.predict(X), sample_weight=sample_weight,
from .metrics.regression import _check_reg_targets
y_pred = self.predict(X)
# XXX: Remove the check in 0.23
y_type, _, _, _ = _check_reg_targets(y, y_pred, None)
if y_type == 'continuous-multioutput':
warnings.warn("The default value of multioutput (not exposed in "
"score method) will change from 'variance_weighted' "
"to 'uniform_average' in 0.23 to keep consistent "
"with 'metrics.r2_score'. To use the new default, "
"please either call 'metrics.r2_score' directly or "
"make a custom scorer with 'metric.make_scorer'.",
FutureWarning)
return r2_score(y, y_pred, sample_weight=sample_weight,
multioutput='variance_weighted')


@@ -1,3 +1,4 @@
import pytest
import numpy as np
from numpy.testing import assert_approx_equal

@@ -377,6 +378,7 @@ def test_pls_errors():
clf.fit, X, Y)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_pls_scaling():
# sanity check for scale=True
n_samples = 1000
@@ -2247,9 +2247,10 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
--------
>>> from sklearn.linear_model import MultiTaskLassoCV
>>> from sklearn.datasets import make_regression
>>> from sklearn.metrics import r2_score
>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)
>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)
>>> reg.score(X, y) # doctest: +ELLIPSIS
>>> r2_score(y, reg.predict(X)) # doctest: +ELLIPSIS
0.9994...
>>> reg.alpha_
0.5713...
@@ -232,6 +232,7 @@ def test_lasso_path_return_models_vs_new_return_gives_same_coefficients():


@pytest.mark.filterwarnings('ignore: The default value of cv') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_enet_path():
# We use a large number of samples and of informative features so that
# the l1_ratio selected is more toward ridge than lasso
@@ -1,3 +1,4 @@
import pytest
import numpy as np
from scipy import sparse

@@ -333,6 +334,7 @@ def test_ransac_min_n_samples():
assert_raises(ValueError, ransac_estimator7.fit, X, y)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_ransac_multi_dimensional_targets():

base_estimator = LinearRegression()
@@ -353,6 +355,7 @@ def test_ransac_multi_dimensional_targets():
assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_ransac_residual_loss():
loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)
loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)
@@ -490,6 +490,7 @@ def check_dense_sparse(test_func):

@pytest.mark.filterwarnings('ignore: The default of the `iid`') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of cv') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
@pytest.mark.parametrize(
'test_func',
(_test_ridge_loo, _test_ridge_cv, _test_ridge_cv_normalize,
@@ -1313,6 +1313,7 @@ def test_pickle():

@pytest.mark.filterwarnings('ignore: The default of the `iid`') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of n_split') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_grid_search_with_multioutput_data():
# Test search with multi-output estimator

@@ -256,6 +256,7 @@ def partial_fit(self, X, y, sample_weight=None):
super().partial_fit(
X, y, sample_weight=sample_weight)

# XXX Remove this method in 0.23
def score(self, X, y, sample_weight=None):
"""Returns the coefficient of determination R^2 of the prediction.
@@ -5,6 +5,7 @@
# Author: Issam H. Laradji
# License: BSD 3 clause

import pytest
import sys
import warnings

@@ -308,6 +309,7 @@ def test_multilabel_classification():
assert_greater(mlp.score(X, y), 0.9)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_multioutput_regression():
# Test that multi-output regression works as expected
X, y = make_regression(n_samples=200, n_targets=5)
@@ -486,3 +486,23 @@ def test_tag_inheritance():
diamond_tag_est = DiamondOverwriteTag()
with pytest.raises(TypeError, match="Inconsistent values for tag"):
diamond_tag_est._get_tags()


# XXX: Remove in 0.23
def test_regressormixin_score_multioutput():
from sklearn.linear_model import LinearRegression
# no warnings when y_type is continuous
X = [[1], [2], [3]]
y = [1, 2, 3]
reg = LinearRegression().fit(X, y)
assert_no_warnings(reg.score, X, y)
# warn when y_type is continuous-multioutput
y = [[1, 2], [2, 3], [3, 4]]
reg = LinearRegression().fit(X, y)
msg = ("The default value of multioutput (not exposed in "
"score method) will change from 'variance_weighted' "
"to 'uniform_average' in 0.23 to keep consistent "
"with 'metrics.r2_score'. To use the new default, "
"please either call 'metrics.r2_score' directly or "
"make a custom scorer with 'metric.make_scorer'.")
assert_warns_message(FutureWarning, msg, reg.score, X, y)
@@ -675,6 +675,7 @@ def test_dummy_regressor_return_std():
assert_array_equal(y_pred_list[1], y_std_expected)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
@pytest.mark.parametrize("y,y_test", [
([1, 1, 1, 2], [1.25] * 4),
(np.array([[2, 2],
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.