diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 38bd521412926..e857c7811c845 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -129,3 +129,10 @@ Linear, kernelized and related models - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the underlying implementation is not random. :issue:`9497` by :user:`Albert Thomas `. + +Metrics + +- Deprecate ``reorder`` parameter in :func:`metrics.auc` as it's no longer required + for :func:`metrics.roc_auc_score`. Moreover using ``reorder=True`` can hide bugs + due to floating point error in the input. + :issue:`9851` by :user:`Hanmin Qin `. diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 01e71a8514ce5..bc18b248cadfe 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -36,7 +36,7 @@ from .base import _average_binary_score -def auc(x, y, reorder=False): +def auc(x, y, reorder='deprecated'): """Compute Area Under the Curve (AUC) using the trapezoidal rule This is a general function, given points on a curve. For computing the @@ -47,12 +47,23 @@ def auc(x, y, reorder=False): Parameters ---------- x : array, shape = [n] - x coordinates. + x coordinates. These must be either monotonic increasing or monotonic + decreasing. y : array, shape = [n] y coordinates. - reorder : boolean, optional (default=False) - If True, assume that the curve is ascending in the case of ties, as for - an ROC curve. If the curve is non-ascending, the result will be wrong. + reorder : boolean, optional (default='deprecated') + Whether to sort x before computing. If False, assume that x must be + either monotonic increasing or monotonic decreasing. If True, y is + used to break ties when sorting x. Make sure that y has a monotonic + relation to x when setting reorder to True. + + .. deprecated:: 0.20 + Parameter ``reorder`` has been deprecated in version 0.20 and will + be removed in 0.22. It's introduced for roc_auc_score (not for + general use) and is no longer used there. What's more, the result + from auc will be significantly influenced if x is sorted + unexpectedly due to slight floating point error (See issue #9786). + Future (and default) behavior is equivalent to ``reorder=False``. Returns ------- @@ -83,8 +94,15 @@ def auc(x, y, reorder=False): raise ValueError('At least 2 points are needed to compute' ' area under curve, but x.shape = %s' % x.shape) + if reorder != 'deprecated': + warnings.warn("The 'reorder' parameter has been deprecated in " + "version 0.20 and will be removed in 0.22. It is " + "recommended not to set 'reorder' and ensure that x " + "is monotonic increasing or monotonic decreasing.", + DeprecationWarning) + direction = 1 - if reorder: + if reorder is True: # reorder the data points according to the x axis and using y to # break ties order = np.lexsort((y, x)) @@ -95,8 +113,8 @@ def auc(x, y, reorder=False): if np.all(dx <= 0): direction = -1 else: - raise ValueError("Reordering is not turned on, and " - "the x array is not increasing: %s" % x) + raise ValueError("x is neither increasing nor decreasing " + ": {}.".format(x)) area = direction * np.trapz(y, x) if isinstance(area, np.memmap): diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 34973f138514e..acdec8932211c 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -20,6 +20,7 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_warns +from sklearn.utils.testing import assert_warns_message from sklearn.metrics import auc from sklearn.metrics import average_precision_score @@ -425,7 +426,20 @@ def test_auc_errors(): assert_raises(ValueError, auc, [0.0], [0.1]) # x is not in order - assert_raises(ValueError, auc, [1.0, 0.0, 0.5], [0.0, 0.0, 0.0]) + x = [2, 1, 3, 4] + y = [5, 6, 7, 8] + error_message = ("x is neither increasing nor decreasing : " + "{}".format(np.array(x))) + assert_raise_message(ValueError, error_message, auc, x, y) + + +def test_deprecated_auc_reorder(): + depr_message = ("The 'reorder' parameter has been deprecated in version " + "0.20 and will be removed in 0.22. It is recommended not " + "to set 'reorder' and ensure that x is monotonic " + "increasing or monotonic decreasing.") + assert_warns_message(DeprecationWarning, depr_message, auc, + [1, 2], [2, 3], reorder=True) def test_auc_score_non_binary_class():