From 9c04a417fdfa7418a92456af3fbde29fc16886db Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Fri, 29 Sep 2017 11:47:51 +0800 Subject: [PATCH 01/21] add tol parameter --- sklearn/metrics/ranking.py | 9 ++++++--- sklearn/metrics/tests/test_ranking.py | 8 ++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 31c8b6c73ce7d..e532c9442d571 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -36,7 +36,7 @@ from .base import _average_binary_score -def auc(x, y, reorder=False): +def auc(x, y, reorder=False, tol=0): """Compute Area Under the Curve (AUC) using the trapezoidal rule This is a general function, given points on a curve. For computing the @@ -53,6 +53,9 @@ def auc(x, y, reorder=False): reorder : boolean, optional (default=False) If True, assume that the curve is ascending in the case of ties, as for an ROC curve. If the curve is non-ascending, the result will be wrong. + tol : float, optional (default=0) + Tolerance allowed when checking whether x is increasing. + Ignored when ``reorder=True``. Returns ------- @@ -91,8 +94,8 @@ def auc(x, y, reorder=False): x, y = x[order], y[order] else: dx = np.diff(x) - if np.any(dx < 0): - if np.all(dx <= 0): + if np.any(dx < -abs(tol)): + if np.all(dx <= abs(tol)): direction = -1 else: raise ValueError("Reordering is not turned on, and " diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index ab8a4684c0c65..24af1cd8c8386 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -429,6 +429,14 @@ def test_auc_errors(): assert_raises(ValueError, auc, [1.0, 0.0, 0.5], [0.0, 0.0, 0.0]) +def test_auc_tol(): + x1 = [1.0, 1.0, 3.0, 4.0] + x2 = [1.0 + 1e-10, 1.0 - 1e-10, 3.0, 4.0] + y = [1.0, 2.0, 3.0, 3.0] + assert_raises(ValueError, auc, x2, y, tol=0) + assert_almost_equal(auc(x1, y, tol=0), auc(x2, y, tol=1e-8)) + + def test_auc_score_non_binary_class(): # Test that roc_auc_score function returns an error when trying # to compute AUC for non-binary class values. From bf94eac0356e9a19e51d9cb595aa1f6812f32e7d Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Fri, 29 Sep 2017 22:11:06 +0800 Subject: [PATCH 02/21] address comment --- sklearn/metrics/ranking.py | 22 ++++++++++++++++++---- sklearn/metrics/tests/test_ranking.py | 8 ++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index e532c9442d571..a21a5c5c08e70 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -36,7 +36,7 @@ from .base import _average_binary_score -def auc(x, y, reorder=False, tol=0): +def auc(x, y, reorder=None, tol=0): """Compute Area Under the Curve (AUC) using the trapezoidal rule This is a general function, given points on a curve. For computing the @@ -47,12 +47,18 @@ def auc(x, y, reorder=False, tol=0): Parameters ---------- x : array, shape = [n] - x coordinates. + Increasing or decreasing x coordinates. y : array, shape = [n] y coordinates. reorder : boolean, optional (default=False) If True, assume that the curve is ascending in the case of ties, as for an ROC curve. If the curve is non-ascending, the result will be wrong. + + ..deprecated:: 0.20 + Parameter ``reorder`` has been deprecated in version 0.20 and will + be removed in 0.22. Future (and default) behavior is equivalent to + `reorder=False`. + tol : float, optional (default=0) Tolerance allowed when checking whether x is increasing. Ignored when ``reorder=True``. @@ -86,6 +92,11 @@ def auc(x, y, reorder=False, tol=0): raise ValueError('At least 2 points are needed to compute' ' area under curve, but x.shape = %s' % x.shape) + if reorder is not None: + warnings.warn("The `reorder` parameter has been deprecated " + "in version 0.20 and will be removed in 0.22.", + DeprecationWarning) + direction = 1 if reorder: # reorder the data points according to the x axis and using y to @@ -98,8 +109,11 @@ def auc(x, y, reorder=False, tol=0): if np.all(dx <= abs(tol)): direction = -1 else: - raise ValueError("Reordering is not turned on, and " - "the x array is not increasing: %s" % x) + raise ValueError("Reordering is not turned on, and the x " + "array is neither increasing nor decreasing" + " : {}. np.diff(x) contains {} positive " + "values and {} negative values." + .format(x, (dx > 0).sum(), (dx < 0).sum())) area = direction * np.trapz(y, x) if isinstance(area, np.memmap): diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 24af1cd8c8386..bb74bf91ee999 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -20,6 +20,7 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_warns +from sklearn.utils.testing import assert_warns_message from sklearn.metrics import auc from sklearn.metrics import average_precision_score @@ -437,6 +438,13 @@ def test_auc_tol(): assert_almost_equal(auc(x1, y, tol=0), auc(x2, y, tol=1e-8)) +def test_deprecated_auc_reorder(): + depr_message = ("The `reorder` parameter has been deprecated " + "in version 0.20 and will be removed in 0.22.") + assert_warns_message(DeprecationWarning, depr_message, auc, + [1, 2], [2, 3], reorder=True) + + def test_auc_score_non_binary_class(): # Test that roc_auc_score function returns an error when trying # to compute AUC for non-binary class values. From 6261a17e7ebc03b40c52aa390c8365212be5bb48 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 3 Oct 2017 21:22:57 +0800 Subject: [PATCH 03/21] remove tol --- sklearn/metrics/ranking.py | 6 +++--- sklearn/metrics/tests/test_ranking.py | 8 -------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index a21a5c5c08e70..b7585491dec6d 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -36,7 +36,7 @@ from .base import _average_binary_score -def auc(x, y, reorder=None, tol=0): +def auc(x, y, reorder=None): """Compute Area Under the Curve (AUC) using the trapezoidal rule This is a general function, given points on a curve. For computing the @@ -105,8 +105,8 @@ def auc(x, y, reorder=None, tol=0): x, y = x[order], y[order] else: dx = np.diff(x) - if np.any(dx < -abs(tol)): - if np.all(dx <= abs(tol)): + if np.any(dx < 0): + if np.all(dx <= 0): direction = -1 else: raise ValueError("Reordering is not turned on, and the x " diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index bb74bf91ee999..162337ef1268e 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -430,14 +430,6 @@ def test_auc_errors(): assert_raises(ValueError, auc, [1.0, 0.0, 0.5], [0.0, 0.0, 0.0]) -def test_auc_tol(): - x1 = [1.0, 1.0, 3.0, 4.0] - x2 = [1.0 + 1e-10, 1.0 - 1e-10, 3.0, 4.0] - y = [1.0, 2.0, 3.0, 3.0] - assert_raises(ValueError, auc, x2, y, tol=0) - assert_almost_equal(auc(x1, y, tol=0), auc(x2, y, tol=1e-8)) - - def test_deprecated_auc_reorder(): depr_message = ("The `reorder` parameter has been deprecated " "in version 0.20 and will be removed in 0.22.") From 90abcd0201fddc235e2fabf6ee813bc243cff643 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 3 Oct 2017 21:28:02 +0800 Subject: [PATCH 04/21] error message --- sklearn/metrics/ranking.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index b7585491dec6d..712cd02d78305 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -109,8 +109,7 @@ def auc(x, y, reorder=None): if np.all(dx <= 0): direction = -1 else: - raise ValueError("Reordering is not turned on, and the x " - "array is neither increasing nor decreasing" + raise ValueError("x is neither increasing nor decreasing" " : {}. np.diff(x) contains {} positive " "values and {} negative values." .format(x, (dx > 0).sum(), (dx < 0).sum())) From 8f5fde76c5ea580647e019a29be22173769fbe40 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 3 Oct 2017 22:52:32 +0800 Subject: [PATCH 05/21] update error message --- sklearn/metrics/ranking.py | 14 ++++++++++---- sklearn/metrics/tests/test_ranking.py | 8 +++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 712cd02d78305..836349b308516 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -109,10 +109,16 @@ def auc(x, y, reorder=None): if np.all(dx <= 0): direction = -1 else: - raise ValueError("x is neither increasing nor decreasing" - " : {}. np.diff(x) contains {} positive " - "values and {} negative values." - .format(x, (dx > 0).sum(), (dx < 0).sum())) + raise ValueError("x is neither increasing nor decreasing " + ": {}. np.diff(x) contains {} positive " + "values and {} negative values. The most " + "positive value in np.diff(x) : x[{}] - " + "x[{}] = {}. The most negative value in " + "np.diff(x) : x[{}] - x[{}] = {}." + .format(x, (dx > 0).sum(), (dx < 0).sum(), + dx.argmax() + 1, dx.argmax(), + dx.max(), dx.argmin() + 1, + dx.argmin(), dx.min())) area = direction * np.trapz(y, x) if isinstance(area, np.memmap): diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 162337ef1268e..0efb03d9ca705 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -427,7 +427,13 @@ def test_auc_errors(): assert_raises(ValueError, auc, [0.0], [0.1]) # x is not in order - assert_raises(ValueError, auc, [1.0, 0.0, 0.5], [0.0, 0.0, 0.0]) + error_message = ("x is neither increasing nor decreasing : " + "[ 2.5 1.6 3.7 4.8]. np.diff(x) contains 2 positive " + "values and 1 negative values. The most positive value " + "in np.diff(x) : x[2] - x[1] = 2.1. The most negative " + "value in np.diff(x) : x[1] - x[0] = -0.9.") + assert_raise_message(ValueError, error_message, auc, + [2.5, 1.6, 3.7, 4.8], [5, 6, 7, 8]) def test_deprecated_auc_reorder(): From eda23c87914d4002591638f592f87fb181eb9564 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 3 Oct 2017 23:02:39 +0800 Subject: [PATCH 06/21] minor fix --- sklearn/metrics/ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 836349b308516..b0dede61859c7 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -50,7 +50,7 @@ def auc(x, y, reorder=None): Increasing or decreasing x coordinates. y : array, shape = [n] y coordinates. - reorder : boolean, optional (default=False) + reorder : boolean, optional (default=None) If True, assume that the curve is ascending in the case of ties, as for an ROC curve. If the curve is non-ascending, the result will be wrong. From 11362ac12992a4a8b1e37251758d9c27614e99a9 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 3 Oct 2017 23:03:27 +0800 Subject: [PATCH 07/21] minor fix --- sklearn/metrics/ranking.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index b0dede61859c7..85863245d6e1d 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -59,10 +59,6 @@ def auc(x, y, reorder=None): be removed in 0.22. Future (and default) behavior is equivalent to `reorder=False`. - tol : float, optional (default=0) - Tolerance allowed when checking whether x is increasing. - Ignored when ``reorder=True``. - Returns ------- auc : float From 9811c25e69e8a5798147426d1f81443b2ce9f428 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 3 Oct 2017 23:39:56 +0800 Subject: [PATCH 08/21] use int in the test to avoid strange failure --- sklearn/metrics/tests/test_ranking.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 0efb03d9ca705..5cc11f82a383b 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -427,13 +427,13 @@ def test_auc_errors(): assert_raises(ValueError, auc, [0.0], [0.1]) # x is not in order - error_message = ("x is neither increasing nor decreasing : " - "[ 2.5 1.6 3.7 4.8]. np.diff(x) contains 2 positive " - "values and 1 negative values. The most positive value " - "in np.diff(x) : x[2] - x[1] = 2.1. The most negative " - "value in np.diff(x) : x[1] - x[0] = -0.9.") + error_message = ("x is neither increasing nor decreasing : [2 1 3 4]. " + "np.diff(x) contains 2 positive values and 1 negative " + "values. The most positive value in np.diff(x) : x[2] " + "- x[1] = 2. The most negative value in np.diff(x) : " + "x[1] - x[0] = -1.") assert_raise_message(ValueError, error_message, auc, - [2.5, 1.6, 3.7, 4.8], [5, 6, 7, 8]) + [2, 1, 3, 4], [5, 6, 7, 8]) def test_deprecated_auc_reorder(): From bd8bbdb4781937848a7c17cd385e05669dc04631 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Wed, 4 Oct 2017 09:06:25 +0800 Subject: [PATCH 09/21] improve --- sklearn/metrics/ranking.py | 7 ++++--- sklearn/metrics/tests/test_ranking.py | 15 ++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 85863245d6e1d..35bf221e02b53 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -105,6 +105,8 @@ def auc(x, y, reorder=None): if np.all(dx <= 0): direction = -1 else: + maxpos = dx.argmax() + minpos = dx.argmin() raise ValueError("x is neither increasing nor decreasing " ": {}. np.diff(x) contains {} positive " "values and {} negative values. The most " @@ -112,9 +114,8 @@ def auc(x, y, reorder=None): "x[{}] = {}. The most negative value in " "np.diff(x) : x[{}] - x[{}] = {}." .format(x, (dx > 0).sum(), (dx < 0).sum(), - dx.argmax() + 1, dx.argmax(), - dx.max(), dx.argmin() + 1, - dx.argmin(), dx.min())) + maxpos + 1, maxpos, dx.max(), + minpos + 1, minpos, dx.min())) area = direction * np.trapz(y, x) if isinstance(area, np.memmap): diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 5cc11f82a383b..f05dc9a8f9b3c 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -427,13 +427,14 @@ def test_auc_errors(): assert_raises(ValueError, auc, [0.0], [0.1]) # x is not in order - error_message = ("x is neither increasing nor decreasing : [2 1 3 4]. " - "np.diff(x) contains 2 positive values and 1 negative " - "values. The most positive value in np.diff(x) : x[2] " - "- x[1] = 2. The most negative value in np.diff(x) : " - "x[1] - x[0] = -1.") - assert_raise_message(ValueError, error_message, auc, - [2, 1, 3, 4], [5, 6, 7, 8]) + x = [2, 1, 3, 4] + y = [5, 6, 7, 8] + error_message = ("x is neither increasing nor decreasing : {}. np.diff(x)" + " contains 2 positive values and 1 negative values. The" + " most positive value in np.diff(x) : x[2] - x[1] = 2." + " The most negative value in np.diff(x) : x[1] - x[0] =" + " -1.".format(np.array(x))) + assert_raise_message(ValueError, error_message, auc, x, y) def test_deprecated_auc_reorder(): From 28845eefb0bc06787381bc85f625004ad2b9041c Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 9 Oct 2017 16:44:14 +0800 Subject: [PATCH 10/21] improve --- sklearn/metrics/ranking.py | 22 +++++++++++----------- sklearn/metrics/tests/test_ranking.py | 9 ++++----- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 35bf221e02b53..becc5f9d9bd45 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -47,10 +47,11 @@ def auc(x, y, reorder=None): Parameters ---------- x : array, shape = [n] - Increasing or decreasing x coordinates. + x coordinates. These must be either monotonic increasing or monotonic + decreasing. y : array, shape = [n] y coordinates. - reorder : boolean, optional (default=None) + reorder : boolean, optional (default='deprecated') If True, assume that the curve is ascending in the case of ties, as for an ROC curve. If the curve is non-ascending, the result will be wrong. @@ -88,13 +89,13 @@ def auc(x, y, reorder=None): raise ValueError('At least 2 points are needed to compute' ' area under curve, but x.shape = %s' % x.shape) - if reorder is not None: + if reorder != 'deprecated': warnings.warn("The `reorder` parameter has been deprecated " "in version 0.20 and will be removed in 0.22.", DeprecationWarning) direction = 1 - if reorder: + if reorder is True: # reorder the data points according to the x axis and using y to # break ties order = np.lexsort((y, x)) @@ -105,16 +106,15 @@ def auc(x, y, reorder=None): if np.all(dx <= 0): direction = -1 else: + # Output the most positive value and most negative value in + # np.diff(x) for debugging. maxpos = dx.argmax() minpos = dx.argmin() raise ValueError("x is neither increasing nor decreasing " - ": {}. np.diff(x) contains {} positive " - "values and {} negative values. The most " - "positive value in np.diff(x) : x[{}] - " - "x[{}] = {}. The most negative value in " - "np.diff(x) : x[{}] - x[{}] = {}." - .format(x, (dx > 0).sum(), (dx < 0).sum(), - maxpos + 1, maxpos, dx.max(), + ": {}. The most positive value in np.diff(x)" + " : x[{}] - x[{}] = {}. The most negative " + "value in np.diff(x) : x[{}] - x[{}] = {}." + .format(x, maxpos + 1, maxpos, dx.max(), minpos + 1, minpos, dx.min())) area = direction * np.trapz(y, x) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index f05dc9a8f9b3c..f4f2ceced7468 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -429,11 +429,10 @@ def test_auc_errors(): # x is not in order x = [2, 1, 3, 4] y = [5, 6, 7, 8] - error_message = ("x is neither increasing nor decreasing : {}. np.diff(x)" - " contains 2 positive values and 1 negative values. The" - " most positive value in np.diff(x) : x[2] - x[1] = 2." - " The most negative value in np.diff(x) : x[1] - x[0] =" - " -1.".format(np.array(x))) + error_message = ("x is neither increasing nor decreasing : {}. The most " + "positive value in np.diff(x) : x[2] - x[1] = 2. The " + "most negative value in np.diff(x) : x[1] - x[0] = " + "-1.".format(np.array(x))) assert_raise_message(ValueError, error_message, auc, x, y) From 04538a66263785a91024efe9f284fdf9182a649a Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 9 Oct 2017 16:47:20 +0800 Subject: [PATCH 11/21] minor fix --- sklearn/metrics/ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index becc5f9d9bd45..e1ce82c058945 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -36,7 +36,7 @@ from .base import _average_binary_score -def auc(x, y, reorder=None): +def auc(x, y, reorder='deprecated'): """Compute Area Under the Curve (AUC) using the trapezoidal rule This is a general function, given points on a curve. For computing the From b818a56b9b8fa5b54a1773e0c4ad7f01b57c8564 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Mon, 9 Oct 2017 23:25:25 +0800 Subject: [PATCH 12/21] update comment --- sklearn/metrics/ranking.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index e1ce82c058945..cdd2e37c5c918 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -52,8 +52,9 @@ def auc(x, y, reorder='deprecated'): y : array, shape = [n] y coordinates. reorder : boolean, optional (default='deprecated') - If True, assume that the curve is ascending in the case of ties, as for - an ROC curve. If the curve is non-ascending, the result will be wrong. + Whether to sort x before computing. If False, assume that x must be + either monotonic increasing or monotonic decreasing. If True, y is + used to break ties when sorting x. ..deprecated:: 0.20 Parameter ``reorder`` has been deprecated in version 0.20 and will From 9bdca94f6940c7af37f1639471dded5e94e0ef96 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 10 Oct 2017 09:00:31 +0800 Subject: [PATCH 13/21] reduce message --- sklearn/metrics/ranking.py | 10 +--------- sklearn/metrics/tests/test_ranking.py | 6 ++---- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index cdd2e37c5c918..cfb57d123a1bc 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -107,16 +107,8 @@ def auc(x, y, reorder='deprecated'): if np.all(dx <= 0): direction = -1 else: - # Output the most positive value and most negative value in - # np.diff(x) for debugging. - maxpos = dx.argmax() - minpos = dx.argmin() raise ValueError("x is neither increasing nor decreasing " - ": {}. The most positive value in np.diff(x)" - " : x[{}] - x[{}] = {}. The most negative " - "value in np.diff(x) : x[{}] - x[{}] = {}." - .format(x, maxpos + 1, maxpos, dx.max(), - minpos + 1, minpos, dx.min())) + ": {}.".format(x)) area = direction * np.trapz(y, x) if isinstance(area, np.memmap): diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index f4f2ceced7468..db63b9cd6bb6c 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -429,10 +429,8 @@ def test_auc_errors(): # x is not in order x = [2, 1, 3, 4] y = [5, 6, 7, 8] - error_message = ("x is neither increasing nor decreasing : {}. The most " - "positive value in np.diff(x) : x[2] - x[1] = 2. The " - "most negative value in np.diff(x) : x[1] - x[0] = " - "-1.".format(np.array(x))) + error_message = ("x is neither increasing nor decreasing : " + "{}".format(np.array(x))) assert_raise_message(ValueError, error_message, auc, x, y) From 4cb411ad80af8f4b2498853e0ba8a2052533481d Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 10 Oct 2017 11:08:48 +0800 Subject: [PATCH 14/21] add note and update what's new --- doc/whats_new/v0.20.rst | 6 ++++++ sklearn/metrics/ranking.py | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 6ccdc58b7b3b0..35466251304ea 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -120,3 +120,9 @@ Linear, kernelized and related models - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the underlying implementation is not random. :issue:`9497` by :user:`Albert Thomas `. + +Metrics + +- Deprecate ``reorder`` parameter in :class:`metrics.auc` as it is introduced + for roc_auc_score and is no longer used there. + :issue:`9851` by :user:`Hanmin Qin `. diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index cfb57d123a1bc..d20f92186b105 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -58,8 +58,9 @@ def auc(x, y, reorder='deprecated'): ..deprecated:: 0.20 Parameter ``reorder`` has been deprecated in version 0.20 and will - be removed in 0.22. Future (and default) behavior is equivalent to - `reorder=False`. + be removed in 0.22. It is introduced for roc_auc_score and is no + longer used there due to issue #9786. Future (and default) behavior + is equivalent to `reorder=False`. Returns ------- From 148bd6b36c012a84e68ac3986bd75ed26dfea197 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 10 Oct 2017 22:39:35 +0800 Subject: [PATCH 15/21] update reason for deprecation --- doc/whats_new/v0.20.rst | 5 +++-- sklearn/metrics/ranking.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 35466251304ea..e2c8b04172922 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -123,6 +123,7 @@ Linear, kernelized and related models Metrics -- Deprecate ``reorder`` parameter in :class:`metrics.auc` as it is introduced - for roc_auc_score and is no longer used there. +- Deprecate ``reorder`` parameter in :class:`metrics.auc` as the result + from auc will be significantly influenced if x is sorted unexpectedly + due to slight floating point error. :issue:`9851` by :user:`Hanmin Qin `. diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index d20f92186b105..d7fbf8a82b014 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -58,8 +58,9 @@ def auc(x, y, reorder='deprecated'): ..deprecated:: 0.20 Parameter ``reorder`` has been deprecated in version 0.20 and will - be removed in 0.22. It is introduced for roc_auc_score and is no - longer used there due to issue #9786. Future (and default) behavior + be removed in 0.22. The result from auc will be significantly + influenced if x is sorted unexpectedly due to slight floating point + error (See issue #9786). Future (and default) behavior is equivalent to `reorder=False`. Returns From 4c4ea886210d184593e6edd2f832f9af7a1292aa Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Tue, 10 Oct 2017 22:53:32 +0800 Subject: [PATCH 16/21] update reason for deprecation --- sklearn/metrics/ranking.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index d7fbf8a82b014..a6788bc9c60a3 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -60,8 +60,8 @@ def auc(x, y, reorder='deprecated'): Parameter ``reorder`` has been deprecated in version 0.20 and will be removed in 0.22. The result from auc will be significantly influenced if x is sorted unexpectedly due to slight floating point - error (See issue #9786). Future (and default) behavior - is equivalent to `reorder=False`. + error (See issue #9786). Future (and default) behavior is equivalent + to `reorder=False`. Returns ------- From a9b15dc14ff4fc0c2d7b18cc79d6cf24d7e522de Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Wed, 11 Oct 2017 11:16:19 +0800 Subject: [PATCH 17/21] more deprecation reasons --- doc/whats_new/v0.20.rst | 7 ++++--- sklearn/metrics/ranking.py | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index e2c8b04172922..43b1d70cbec09 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -123,7 +123,8 @@ Linear, kernelized and related models Metrics -- Deprecate ``reorder`` parameter in :class:`metrics.auc` as the result - from auc will be significantly influenced if x is sorted unexpectedly - due to slight floating point error. +- Deprecate ``reorder`` parameter in :class:`metrics.auc` as it's introduced + for roc_auc_score (not for general use) and is no longer used there. What's + more, the result from auc will be significantly influenced if x is sorted + unexpectedly due to slight floating point error. :issue:`9851` by :user:`Hanmin Qin `. diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index a6788bc9c60a3..d1bb77e8c1369 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -58,10 +58,11 @@ def auc(x, y, reorder='deprecated'): ..deprecated:: 0.20 Parameter ``reorder`` has been deprecated in version 0.20 and will - be removed in 0.22. The result from auc will be significantly - influenced if x is sorted unexpectedly due to slight floating point - error (See issue #9786). Future (and default) behavior is equivalent - to `reorder=False`. + be removed in 0.22. It's introduced for roc_auc_score (not for + general use) and is no longer used there. What's more, the result + from auc will be significantly influenced if x is sorted + unexpectedly due to slight floating point error (See issue #9786). + Future (and default) behavior is equivalent to `reorder=False`. Returns ------- From 046f3e300e7f03a5887aa446ac4377781d167ab4 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Wed, 11 Oct 2017 11:23:56 +0800 Subject: [PATCH 18/21] more comment according to jnothman --- sklearn/metrics/ranking.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index d1bb77e8c1369..55c5408af8c00 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -54,7 +54,8 @@ def auc(x, y, reorder='deprecated'): reorder : boolean, optional (default='deprecated') Whether to sort x before computing. If False, assume that x must be either monotonic increasing or monotonic decreasing. If True, y is - used to break ties when sorting x. + used to break ties when sorting x. Make sure that y has a monotonic + relation to x when setting reorder to True. ..deprecated:: 0.20 Parameter ``reorder`` has been deprecated in version 0.20 and will From b66a6f4a0b1b4193b0d1e9cc9ea78c401e512e97 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Wed, 11 Oct 2017 16:03:33 +0800 Subject: [PATCH 19/21] more informative warning message --- sklearn/metrics/ranking.py | 8 +++++--- sklearn/metrics/tests/test_ranking.py | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index 55c5408af8c00..d36bcbfbcb49d 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -63,7 +63,7 @@ def auc(x, y, reorder='deprecated'): general use) and is no longer used there. What's more, the result from auc will be significantly influenced if x is sorted unexpectedly due to slight floating point error (See issue #9786). - Future (and default) behavior is equivalent to `reorder=False`. + Future (and default) behavior is equivalent to ``reorder=False``. Returns ------- @@ -95,8 +95,10 @@ def auc(x, y, reorder='deprecated'): ' area under curve, but x.shape = %s' % x.shape) if reorder != 'deprecated': - warnings.warn("The `reorder` parameter has been deprecated " - "in version 0.20 and will be removed in 0.22.", + warnings.warn("The 'reorder' parameter has been deprecated in " + "version 0.20 and will be removed in 0.22. It is " + "recommended not to set 'reorder' and ensure that x " + "is monotonic increasing or monotonic decreasing.", DeprecationWarning) direction = 1 diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index db63b9cd6bb6c..790ad27738c64 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -435,8 +435,10 @@ def test_auc_errors(): def test_deprecated_auc_reorder(): - depr_message = ("The `reorder` parameter has been deprecated " - "in version 0.20 and will be removed in 0.22.") + depr_message = ("The 'reorder' parameter has been deprecated in version " + "0.20 and will be removed in 0.22. It is recommended not " + "to set 'reorder' and ensure that x is monotonic " + "increasing or monotonic decreasing.") assert_warns_message(DeprecationWarning, depr_message, auc, [1, 2], [2, 3], reorder=True) From d549b3a8a3fb1855da2338f12a38bde77107c8be Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Wed, 11 Oct 2017 16:39:41 +0800 Subject: [PATCH 20/21] fix stupid mistake --- sklearn/metrics/ranking.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index d36bcbfbcb49d..3bfb2f201c872 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -57,13 +57,13 @@ def auc(x, y, reorder='deprecated'): used to break ties when sorting x. Make sure that y has a monotonic relation to x when setting reorder to True. - ..deprecated:: 0.20 - Parameter ``reorder`` has been deprecated in version 0.20 and will - be removed in 0.22. It's introduced for roc_auc_score (not for - general use) and is no longer used there. What's more, the result - from auc will be significantly influenced if x is sorted - unexpectedly due to slight floating point error (See issue #9786). - Future (and default) behavior is equivalent to ``reorder=False``. + .. deprecated:: 0.20 + Parameter ``reorder`` has been deprecated in version 0.20 and will + be removed in 0.22. It's introduced for roc_auc_score (not for + general use) and is no longer used there. What's more, the result + from auc will be significantly influenced if x is sorted + unexpectedly due to slight floating point error (See issue #9786). + Future (and default) behavior is equivalent to ``reorder=False``. Returns ------- From f9534f4f0e97bdec3c588405e02da935adf2b186 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 17 Oct 2017 13:44:59 +1100 Subject: [PATCH 21/21] DOC attempt to clarify what's new wording --- doc/whats_new/v0.20.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 43b1d70cbec09..559ee3424fb13 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -123,8 +123,7 @@ Linear, kernelized and related models Metrics -- Deprecate ``reorder`` parameter in :class:`metrics.auc` as it's introduced - for roc_auc_score (not for general use) and is no longer used there. What's - more, the result from auc will be significantly influenced if x is sorted - unexpectedly due to slight floating point error. +- Deprecate ``reorder`` parameter in :func:`metrics.auc` as it's no longer required + for :func:`metrics.roc_auc_score`. Moreover using ``reorder=True`` can hide bugs + due to floating point error in the input. :issue:`9851` by :user:`Hanmin Qin `.