Merge pull request #23763 from seberg/nep50-fixes-part2

BUG: Fix weak scalar logic for large ints in ufuncs
numpy · May 16, 2023 · 0200e4a · 0200e4a
2 parents 08f8ade + ed9b716
commit 0200e4a
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 3 deletions.
diff --git a/numpy/core/src/multiarray/arrayobject.h b/numpy/core/src/multiarray/arrayobject.h
@@ -40,6 +40,13 @@ static const int NPY_ARRAY_WARN_ON_WRITE = (1 << 31);
 static const int NPY_ARRAY_WAS_PYTHON_INT = (1 << 30);
 static const int NPY_ARRAY_WAS_PYTHON_FLOAT = (1 << 29);
 static const int NPY_ARRAY_WAS_PYTHON_COMPLEX = (1 << 28);
+/*
+ * Mark that this was a huge int which was turned into an object array (or
+ * unsigned/non-default integer array), but then replaced by a temporary
+ * array for further processing. This flag is only used in the ufunc machinery
+ * where it is tricky to cover correctly all type resolution paths.
+ */
+static const int NPY_ARRAY_WAS_INT_AND_REPLACED = (1 << 27);
 static const int NPY_ARRAY_WAS_PYTHON_LITERAL = (1 << 30 | 1 << 29 | 1 << 28);
 
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_ARRAYOBJECT_H_ */
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
@@ -1011,6 +1011,34 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
          * necessary to propagate the information to the legacy type resolution.
          */
         if (npy_mark_tmp_array_if_pyscalar(obj, out_op[i], &out_op_DTypes[i])) {
+            if (PyArray_FLAGS(out_op[i]) & NPY_ARRAY_WAS_PYTHON_INT
+                    && PyArray_TYPE(out_op[i]) != NPY_LONG) {
+                /*
+                 * When `np.array(integer)` is not the default integer (mainly
+                 * object dtype), this confuses many type resolvers.  Simply
+                 * forcing a default integer array is unfortunately easiest.
+                 * In this disables the optional NEP 50 warnings, but in
+                 * practice when this happens we should _usually_ pick the
+                 * default integer loop and that raises an error.
+                 * (An exception is `float64(1.) + 10**100` which silently
+                 * will give a float64 result rather than a Python float.)
+                 *
+                 * TODO: Just like the general dual NEP 50/legacy promotion
+                 * support this is meant as a temporary hack for NumPy 1.25.
+                 */
+                static PyArrayObject *zero_arr = NULL;
+                if (NPY_UNLIKELY(zero_arr == NULL)) {
+                    zero_arr = (PyArrayObject *)PyArray_ZEROS(
+                            0, NULL, NPY_LONG, NPY_FALSE);
+                    if (zero_arr == NULL) {
+                        goto fail;
+                    }
+                    ((PyArrayObject_fields *)zero_arr)->flags |= (
+                        NPY_ARRAY_WAS_PYTHON_INT|NPY_ARRAY_WAS_INT_AND_REPLACED);
+                }
+                Py_INCREF(zero_arr);
+                Py_SETREF(out_op[i], zero_arr);
+            }
             *promoting_pyscalars = NPY_TRUE;
         }
     }
@@ -4929,9 +4957,13 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
             if (!(orig_flags & NPY_ARRAY_WAS_PYTHON_LITERAL)) {
                 continue;
             }
-            /* If the descriptor matches, no need to worry about conversion */
-            if (PyArray_EquivTypes(
-                    PyArray_DESCR(operands[i]), operation_descrs[i])) {
+            /*
+             * If descriptor matches, no need to convert, but integers may
+             * have been too large.
+             */
+            if (!(orig_flags & NPY_ARRAY_WAS_INT_AND_REPLACED)
+                    && PyArray_EquivTypes(
+                        PyArray_DESCR(operands[i]), operation_descrs[i])) {
                 continue;
             }
             /* Otherwise, replace the operand with a new array */

diff --git a/numpy/core/tests/test_nep50_promotions.py b/numpy/core/tests/test_nep50_promotions.py
@@ -181,10 +181,54 @@ def test_nep50_integer_regression():
     assert (arr + 2**63).dtype == np.float64
     assert (arr[()] + 2**63).dtype == np.float64
 
+
 def test_nep50_with_axisconcatenator():
     # I promised that this will be an error in the future in the 1.25
     # release notes;  test this (NEP 50 opt-in makes the deprecation an error).
     np._set_promotion_state("weak")
 
     with pytest.raises(OverflowError):
         np.r_[np.arange(5, dtype=np.int8), 255]
+
+
+@pytest.mark.parametrize("ufunc", [np.add, np.power])
+@pytest.mark.parametrize("state", ["weak", "weak_and_warn"])
+def test_nep50_huge_integers(ufunc, state):
+    # Very large integers are complicated, because they go to uint64 or
+    # object dtype.  This tests covers a few possible paths (some of which
+    # cannot give the NEP 50 warnings).
+    np._set_promotion_state(state)
+
+    with pytest.raises(OverflowError):
+        ufunc(np.int64(0), 2**63)  # 2**63 too large for int64
+
+    if state == "weak_and_warn":
+        with pytest.warns(UserWarning,
+                match="result dtype changed.*float64.*uint64"):
+            with pytest.raises(OverflowError):
+                ufunc(np.uint64(0), 2**64)
+    else:
+        with pytest.raises(OverflowError):
+            ufunc(np.uint64(0), 2**64)  # 2**64 cannot be represented by uint64
+
+    # However, 2**63 can be represented by the uint64 (and that is used):
+    if state == "weak_and_warn":
+        with pytest.warns(UserWarning,
+                match="result dtype changed.*float64.*uint64"):
+            res = ufunc(np.uint64(1), 2**63)
+    else:
+        res = ufunc(np.uint64(1), 2**63)
+
+    assert res.dtype == np.uint64
+    assert res == ufunc(1, 2**63, dtype=object)
+
+    # The following paths fail to warn correctly about the change:
+    with pytest.raises(OverflowError):
+        ufunc(np.int64(1), 2**63)  # np.array(2**63) would go to uint
+
+    with pytest.raises(OverflowError):
+        ufunc(np.int64(1), 2**100)  # np.array(2**100) would go to object
+
+    # This would go to object and thus a Python float, not a NumPy one:
+    res = ufunc(1.0, 2**100)
+    assert isinstance(res, np.float64)