diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 0e87b29828977..db85c11b117b7 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -338,6 +338,10 @@ Changelog - |Enhancement| :class:`isotonic.IsotonicRegression` now accepts 2d array with 1 feature as input array. :pr:`17379` by :user:`Jiaxiang `. +- |Fix| Add tolerance when determining duplicate X values to prevent + inf values from being predicted by :class:`isotonic.IsotonicRegression`. + :pr:`18639` by :user:`Lucy Liu `. + :mod:`sklearn.kernel_approximation` ................................... diff --git a/sklearn/_isotonic.pyx b/sklearn/_isotonic.pyx index 75c4bbef11379..7f60b889fa284 100644 --- a/sklearn/_isotonic.pyx +++ b/sklearn/_isotonic.pyx @@ -77,8 +77,6 @@ def _make_unique(np.ndarray[dtype=floating] X, Assumes that X is ordered, so that all duplicates follow each other. """ unique_values = len(np.unique(X)) - if unique_values == len(X): - return X, y, sample_weights cdef np.ndarray[dtype=floating] y_out = np.empty(unique_values, dtype=X.dtype) @@ -90,13 +88,14 @@ def _make_unique(np.ndarray[dtype=floating] X, cdef floating current_weight = 0 cdef floating y_old = 0 cdef int i = 0 - cdef int current_count = 0 cdef int j cdef floating x cdef int n_samples = len(X) + cdef floating eps = np.finfo(X.dtype).resolution + for j in range(n_samples): x = X[j] - if x != current_x: + if x - current_x >= eps: # next unique value x_out[i] = current_x weights_out[i] = current_weight @@ -105,13 +104,11 @@ def _make_unique(np.ndarray[dtype=floating] X, current_x = x current_weight = sample_weights[j] current_y = y[j] * sample_weights[j] - current_count = 1 else: current_weight += sample_weights[j] current_y += y[j] * sample_weights[j] - current_count += 1 x_out[i] = current_x weights_out[i] = current_weight y_out[i] = current_y / current_weight - return x_out, y_out, weights_out + return x_out[:i+1], y_out[:i+1], weights_out[:i+1] diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index 66892370f06f0..af14f73cd1beb 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -511,6 +511,43 @@ def test_make_unique_dtype(): assert_array_equal(x, [2, 3, 5]) +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +def test_make_unique_tolerance(dtype): + # Check that equality takes account of np.finfo tolerance + x = np.array([0, 1e-16, 1, 1+1e-14], dtype=dtype) + y = x.copy() + w = np.ones_like(x) + x, y, w = _make_unique(x, y, w) + if dtype == np.float64: + x_out = np.array([0, 1, 1+1e-14]) + else: + x_out = np.array([0, 1]) + assert_array_equal(x, x_out) + + +def test_isotonic_make_unique_tolerance(): + # Check that averaging of targets for duplicate X is done correctly, + # taking into account tolerance + X = np.array([0, 1, 1+1e-16, 2], dtype=np.float64) + y = np.array([0, 1, 2, 3], dtype=np.float64) + ireg = IsotonicRegression().fit(X, y) + y_pred = ireg.predict([0, 0.5, 1, 1.5, 2]) + + assert_array_equal(y_pred, np.array([0, 0.75, 1.5, 2.25, 3])) + assert_array_equal(ireg.X_thresholds_, np.array([0., 1., 2.])) + assert_array_equal(ireg.y_thresholds_, np.array([0., 1.5, 3.])) + + +def test_isotonic_non_regression_inf_slope(): + # Non-regression test to ensure that inf values are not returned + # see: https://github.com/scikit-learn/scikit-learn/issues/10903 + X = np.array([0., 4.1e-320, 4.4e-314, 1.]) + y = np.array([0.42, 0.42, 0.44, 0.44]) + ireg = IsotonicRegression().fit(X, y) + y_pred = ireg.predict(np.array([0, 2.1e-319, 5.4e-316, 1e-10])) + assert np.all(np.isfinite(y_pred)) + + @pytest.mark.parametrize("increasing", [True, False]) def test_isotonic_thresholds(increasing): rng = np.random.RandomState(42)