Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX _safe_divide should handle zero-division with numpy scalar #27312

Merged
merged 13 commits into from
Sep 10, 2023
20 changes: 14 additions & 6 deletions sklearn/ensemble/_gb.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,17 @@

def _safe_divide(numerator, denominator):
"""Prevents overflow and division by zero."""
with np.errstate(divide="raise"):
try:
return numerator / denominator
except FloatingPointError:
return 0.0
try:
# Cast to a Python float to triggers a ZeroDivisionError without relying
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
# on `np.errstate` that is not supported by Pyodide.
result = float(numerator) / float(denominator)
if np.isinf(result):
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
# Raise a runtime warning if overflow.
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn("overflow encountered in divide", RuntimeWarning)
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
return result
except ZeroDivisionError:
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn("divide by zero encountered in divide", RuntimeWarning)
glemaitre marked this conversation as resolved.
Show resolved Hide resolved
return 0.0


def _init_raw_predictions(X, estimator, loss, use_predict_proba):
Expand Down Expand Up @@ -235,7 +241,9 @@ def compute_update(y_, indices, neg_gradient, raw_prediction, k):

# update each leaf (= perform line search)
for leaf in np.nonzero(tree.children_left == TREE_LEAF)[0]:
indices = np.nonzero(terminal_regions == leaf)[0] # of terminal regions
indices = np.nonzero(masked_terminal_regions == leaf)[
0
] # of terminal regions
y_ = y.take(indices, axis=0)
sw = None if sample_weight is None else sample_weight[indices]
update = compute_update(y_, indices, neg_gradient, raw_prediction, k)
Expand Down
8 changes: 5 additions & 3 deletions sklearn/ensemble/tests/test_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1445,11 +1445,13 @@ def test_huber_vs_mean_and_median():

def test_safe_divide():
"""Test that _safe_divide handles division by zero."""
assert _safe_divide(np.array([1e300]), 0) == 0

with pytest.warns(RuntimeWarning, match="divide"):
assert _safe_divide(np.float64(1e300), 0) == 0
with pytest.warns(RuntimeWarning, match="divide"):
assert _safe_divide(np.float64(0.0), np.float64(0.0)) == 0
with pytest.warns(RuntimeWarning, match="overflow"):
# np.finfo(float).max = 1.7976931348623157e+308
_safe_divide(np.array([1e300]), 1e-10)
print(_safe_divide(np.float64(1e300), 1e-10))
glemaitre marked this conversation as resolved.
Show resolved Hide resolved


def test_squared_error_exact_backward_compat():
Expand Down