From e6b29841cc98c5251abaf78a46dcee6a675fbfd9 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Tue, 10 Jan 2023 16:17:05 +0100
Subject: [PATCH] BUG: Ensure correct loop order in sin, cos, and arctan2

These were incorrect afer being vectorized.  The commit additional
tests these (not arctan2 admittedly) and adds a check to generate_umath
to make it a bit less likely that future additions add this type of thing.

Note that the check allows duplicated loops so long they are correctly
ordered the *first* time.  This makes results correct, but duplicated
loops are not nice anyways and it would be nice to remove them.

We could drop them manually in hindsight even?  In any case, that should
not be backported, so it is not includedhere.

Closes gh-22984
---
 numpy/core/code_generators/generate_umath.py | 51 ++++++++++++++++++--
 numpy/core/tests/test_umath.py               |  8 +++
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 39d4497b57dd..378ab3b7b966 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -89,6 +89,41 @@ def finish_signature(self, nin, nout):
         assert len(self.out) == nout
         self.astype = self.astype_dict.get(self.type, None)
 
+
+def _check_order(types1, types2):
+    dtype_order = allP + "O"
+    for t1, t2 in zip(types1, types2):
+        # We have no opinion on object or time ordering for now:
+        if t1 in "OP" or t2 in "OP":
+            return True
+        if t1 in "mM" or t2 in "mM":
+            return True
+
+        t1i = dtype_order.index(t1)
+        t2i = dtype_order.index(t2)
+        if t1i < t2i:
+            return
+        if t2i > t1i:
+            break
+
+    raise TypeError(
+            f"Input dtypes are unsorted or duplicate: {types1} and {types2}")
+
+
+def check_td_order(tds):
+    # A quick check for whether the signatures make sense, it happened too
+    # often that SIMD additions added loops that do not even make some sense.
+    # TODO: This should likely be a test and it would be nice if it rejected
+    #       duplicate entries as well (but we have many as of writing this).
+    signatures = [t.in_+t.out for t in tds]
+
+    for prev_i, sign in enumerate(signatures[1:]):
+        if sign in signatures[:prev_i+1]:
+            continue  # allow duplicates...
+        
+        _check_order(signatures[prev_i], sign)
+
+
 _fdata_map = dict(
     e='npy_%sf',
     f='npy_%sf',
@@ -172,6 +207,9 @@ def __init__(self, nin, nout, identity, docstring, typereso,
         for td in self.type_descriptions:
             td.finish_signature(self.nin, self.nout)
 
+        check_td_order(self.type_descriptions)
+
+
 # String-handling utilities to avoid locale-dependence.
 
 import string
@@ -691,18 +729,20 @@ def english_upper(s):
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.cos'),
           None,
+          TD('e', dispatch=[('loops_umath_fp', 'e')]),
           TD('f', dispatch=[('loops_trigonometric', 'f')]),
-          TD('ed', dispatch=[('loops_umath_fp', 'ed')]),
-          TD('fdg' + cmplx, f='cos'),
+          TD('d', dispatch=[('loops_umath_fp', 'd')]),
+          TD('g' + cmplx, f='cos'),
           TD(P, f='cos'),
           ),
 'sin':
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.sin'),
           None,
+          TD('e', dispatch=[('loops_umath_fp', 'e')]),
           TD('f', dispatch=[('loops_trigonometric', 'f')]),
-          TD('ed', dispatch=[('loops_umath_fp', 'ed')]),
-          TD('fdg' + cmplx, f='sin'),
+          TD('d', dispatch=[('loops_umath_fp', 'd')]),
+          TD('g' + cmplx, f='sin'),
           TD(P, f='sin'),
           ),
 'tan':
@@ -860,8 +900,9 @@ def english_upper(s):
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.arctan2'),
           None,
+          TD('e', f='atan2', astype={'e': 'f'}),
           TD('fd', dispatch=[('loops_umath_fp', 'fd')]),
-          TD(flts, f='atan2', astype={'e': 'f'}),
+          TD('g', f='atan2', astype={'e': 'f'}),
           TD(P, f='arctan2'),
           ),
 'remainder':
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 10b09f963ab4..03bed0aba615 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -3971,6 +3971,14 @@ def check(func, z0, d=1):
             check(func, pts, 1j)
             check(func, pts, 1+1j)
 
+    @np.errstate(all="ignore")
+    def test_promotion_corner_cases(self):
+        for func in self.funcs:
+            assert func(np.float16(1)).dtype == np.float16
+            # Integer to low precision float promotion is a dubious choice:
+            assert func(np.uint8(1)).dtype == np.float16
+            assert func(np.int16(1)).dtype == np.float32
+
 
 class TestAttributes:
     def test_attributes(self):