scikit-learn · ogrisel · Oct 27, 2020 · Oct 19, 2020 · Oct 19, 2020 · Oct 19, 2020
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -338,6 +338,10 @@ Changelog
 - |Enhancement| :class:`isotonic.IsotonicRegression` now accepts 2d array with
   1 feature as input array. :pr:`17379` by :user:`Jiaxiang <fujiaxiang>`.
 
+- |Fix| Add tolerance when determining duplicate X values to prevent
+  inf values from being predicted by :class:`isotonic.IsotonicRegression`.
+  :pr:`18639` by :user:`Lucy Liu <lucyleeow>`.
+
 :mod:`sklearn.kernel_approximation`
 ...................................
 

diff --git a/sklearn/_isotonic.pyx b/sklearn/_isotonic.pyx
@@ -77,8 +77,6 @@ def _make_unique(np.ndarray[dtype=floating] X,
     Assumes that X is ordered, so that all duplicates follow each other.
     """
     unique_values = len(np.unique(X))
-    if unique_values == len(X):
-        return X, y, sample_weights
 
     cdef np.ndarray[dtype=floating] y_out = np.empty(unique_values,
                                                      dtype=X.dtype)
@@ -90,13 +88,14 @@ def _make_unique(np.ndarray[dtype=floating] X,
     cdef floating current_weight = 0
     cdef floating y_old = 0
     cdef int i = 0
-    cdef int current_count = 0
     cdef int j
     cdef floating x
     cdef int n_samples = len(X)
+    cdef floating eps = np.finfo(X.dtype).resolution
+
     for j in range(n_samples):
         x = X[j]
-        if x != current_x:
+        if x - current_x >= eps:
             # next unique value
             x_out[i] = current_x
             weights_out[i] = current_weight
@@ -105,13 +104,11 @@ def _make_unique(np.ndarray[dtype=floating] X,
             current_x = x
             current_weight = sample_weights[j]
             current_y = y[j] * sample_weights[j]
-            current_count = 1
         else:
             current_weight += sample_weights[j]
             current_y += y[j] * sample_weights[j]
-            current_count += 1
 
     x_out[i] = current_x
     weights_out[i] = current_weight
     y_out[i] = current_y / current_weight
-    return x_out, y_out, weights_out
+    return x_out[:i+1], y_out[:i+1], weights_out[:i+1]
diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py
@@ -511,6 +511,43 @@ def test_make_unique_dtype():
         assert_array_equal(x, [2, 3, 5])
 
 
+@pytest.mark.parametrize("dtype", [np.float64, np.float32])
+def test_make_unique_tolerance(dtype):
+    # Check that equality takes account of np.finfo tolerance
+    x = np.array([0, 1e-16, 1, 1+1e-14], dtype=dtype)
+    y = x.copy()
+    w = np.ones_like(x)
+    x, y, w = _make_unique(x, y, w)
+    if dtype == np.float64:
+        x_out = np.array([0, 1, 1+1e-14])
+    else:
+        x_out = np.array([0, 1])
+    assert_array_equal(x, x_out)
+
+
+def test_isotonic_make_unique_tolerance():
+    # Check that averaging of targets for duplicate X is done correctly,
+    # taking into account tolerance
+    X = np.array([0, 1, 1+1e-16, 2], dtype=np.float64)
+    y = np.array([0, 1, 2, 3], dtype=np.float64)
+    ireg = IsotonicRegression().fit(X, y)
+    y_pred = ireg.predict([0, 0.5, 1, 1.5, 2])
+
+    assert_array_equal(y_pred, np.array([0, 0.75, 1.5, 2.25, 3]))
+    assert_array_equal(ireg.X_thresholds_, np.array([0., 1., 2.]))
+    assert_array_equal(ireg.y_thresholds_, np.array([0., 1.5, 3.]))
+
+
+def test_isotonic_non_regression_inf_slope():
+    # Non-regression test to ensure that inf values are not returned
+    # see: https://github.com/scikit-learn/scikit-learn/issues/10903
+    X = np.array([0., 4.1e-320, 4.4e-314, 1.])
+    y = np.array([0.42, 0.42, 0.44, 0.44])
+    ireg = IsotonicRegression().fit(X, y)
+    y_pred = ireg.predict(np.array([0, 2.1e-319, 5.4e-316, 1e-10]))
+    assert np.all(np.isfinite(y_pred))
+
+
 @pytest.mark.parametrize("increasing", [True, False])
 def test_isotonic_thresholds(increasing):
     rng = np.random.RandomState(42)