Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG+1] API Change default multioutput in RegressorMixin.score to keep consistent with metrics.r2_score #13157

Merged
merged 11 commits into from Mar 15, 2019
Copy path View file
@@ -43,6 +43,15 @@ Support for Python 3.4 and below has been officially dropped.
section should be ordered according to the label ordering above. Entries
should end with: :issue:`123456` by :user:`Joe Bloggs <joeongithub>`.
:mod:`sklearn.base`
...................

- |API| The default value of ``multioutput`` in the ``score`` method of
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

Reword this "the R2 score used when calling score on a regressor will use multioutput='uniform_average' from version ..."

:class:`base.RegressorMixin` will change from ``variance_weighted`` to
``uniform_average`` in 0.23 to keep consistent with :func:`metrics.r2_score`.
This will influence the ``score`` method of all the regressors.
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

"all multioutput regressors"

:issue:`13157` by :user:`Hanmin Qin <qinhanmin2014>`.

:mod:`sklearn.calibration`
..........................

Copy path View file
@@ -13,7 +13,6 @@
from . import __version__



def clone(estimator, safe=True):
"""Constructs a new estimator with the same parameters.
@@ -323,10 +322,31 @@ def score(self, X, y, sample_weight=None):
-------
score : float
R^2 of self.predict(X) wrt. y.
Notes
-----
The default value of ``multioutput`` in the ``score`` method of will
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

rm "of"

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

I think this wording is still a bit circuitous.

change from ``variance_weighted`` to ``uniform_average`` in 0.23 to
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

my preference is for quotes rather than code when we are talking about string constants

keep consistent with ``metrics.r2_score``. This will influence the
This conversation was marked as resolved by qinhanmin2014

This comment has been minimized.

Copy link
@jnothman

jnothman Mar 12, 2019

Member

use single backticks

``score`` method of all the regressors. To use the new default, please
either call ``metrics.r2_score`` directly or make a custom scorer with
``metric.make_scorer``.
"""

from .metrics import r2_score
return r2_score(y, self.predict(X), sample_weight=sample_weight,
from .metrics.regression import _check_reg_targets
y_pred = self.predict(X)
# XXX: Remove the check in 0.23
y_type, _, _, _ = _check_reg_targets(y, y_pred, None)
if y_type == 'continuous-multioutput':
warnings.warn("The default value of multioutput (not exposed in "
"score method) will change from 'variance_weighted' "
"to 'uniform_average' in 0.23 to keep consistent "
"with 'metrics.r2_score'. To use the new default, "
"please either call 'metrics.r2_score' directly or "
"make a custom scorer with 'metric.make_scorer'.",
FutureWarning)
return r2_score(y, y_pred, sample_weight=sample_weight,
multioutput='variance_weighted')


@@ -1,3 +1,4 @@
import pytest
import numpy as np
from numpy.testing import assert_approx_equal

@@ -377,6 +378,7 @@ def test_pls_errors():
clf.fit, X, Y)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_pls_scaling():
# sanity check for scale=True
n_samples = 1000
@@ -2254,9 +2254,10 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
--------
>>> from sklearn.linear_model import MultiTaskLassoCV
>>> from sklearn.datasets import make_regression
>>> from sklearn.metrics import r2_score
>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)
>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)
>>> reg.score(X, y) # doctest: +ELLIPSIS
>>> r2_score(y, reg.predict(X)) # doctest: +ELLIPSIS
0.9994...
>>> reg.alpha_
0.5713...
@@ -232,6 +232,7 @@ def test_lasso_path_return_models_vs_new_return_gives_same_coefficients():


@pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_enet_path():
# We use a large number of samples and of informative features so that
# the l1_ratio selected is more toward ridge than lasso
@@ -1,3 +1,4 @@
import pytest
import numpy as np
from scipy import sparse

@@ -333,6 +334,7 @@ def test_ransac_min_n_samples():
assert_raises(ValueError, ransac_estimator7.fit, X, y)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_ransac_multi_dimensional_targets():

base_estimator = LinearRegression()
@@ -353,6 +355,7 @@ def test_ransac_multi_dimensional_targets():
assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_ransac_residual_loss():
loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)
loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)
@@ -490,6 +490,7 @@ def check_dense_sparse(test_func):

@pytest.mark.filterwarnings('ignore: The default of the `iid`') # 0.22
@pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
@pytest.mark.parametrize(
'test_func',
(_test_ridge_loo, _test_ridge_cv, _test_ridge_cv_normalize,
@@ -1312,6 +1312,7 @@ def test_pickle():

@pytest.mark.filterwarnings('ignore: The default of the `iid`') # 0.22
@pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_grid_search_with_multioutput_data():
# Test search with multi-output estimator

@@ -5,6 +5,7 @@
# Author: Issam H. Laradji
# License: BSD 3 clause

import pytest
import sys
import warnings

@@ -308,6 +309,7 @@ def test_multilabel_classification():
assert_greater(mlp.score(X, y), 0.9)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
def test_multioutput_regression():
# Test that multi-output regression works as expected
X, y = make_regression(n_samples=200, n_targets=5)
Copy path View file
@@ -449,3 +449,23 @@ def test_pickling_works_when_getstate_is_overwritten_in_the_child_class():
estimator_restored = pickle.loads(serialized)
assert_equal(estimator_restored.attribute_pickled, 5)
assert_equal(estimator_restored._attribute_not_pickled, None)


# XXX: Remove in 0.23
def test_regressormixin_score_multioutput():
from sklearn.linear_model import LinearRegression
# no warnings when y_type is continuous
X = [[1], [2], [3]]
y = [1, 2, 3]
reg = LinearRegression().fit(X, y)
assert_no_warnings(reg.score, X, y)
# warn when y_type is continuous-multioutput
y = [[1, 2], [2, 3], [3, 4]]
reg = LinearRegression().fit(X, y)
msg = ("The default value of multioutput (not exposed in "
"score method) will change from 'variance_weighted' "
"to 'uniform_average' in 0.23 to keep consistent "
"with 'metrics.r2_score'. To use the new default, "
"please either call 'metrics.r2_score' directly or "
"make a custom scorer with 'metric.make_scorer'.")
assert_warns_message(FutureWarning, msg, reg.score, X, y)
Copy path View file
@@ -675,6 +675,7 @@ def test_dummy_regressor_return_std():
assert_array_equal(y_pred_list[1], y_std_expected)


@pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23
@pytest.mark.parametrize("y,y_test", [
([1, 1, 1, 2], [1.25] * 4),
(np.array([[2, 2],
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.