From a4daaf5a3924248f71caddbcbd3cf28afef802a6 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Fri, 5 Nov 2021 10:23:42 -0500
Subject: [PATCH 1/2] BUG: Fix float16 einsum fastpaths using wrong tempvar

---
 .../core/src/multiarray/einsum_sumprod.c.src  | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/numpy/core/src/multiarray/einsum_sumprod.c.src b/numpy/core/src/multiarray/einsum_sumprod.c.src
index 29ceabd71d58..3114a58960ef 100644
--- a/numpy/core/src/multiarray/einsum_sumprod.c.src
+++ b/numpy/core/src/multiarray/einsum_sumprod.c.src
@@ -337,13 +337,13 @@ static NPY_GCC_OPT_3 void
         /**begin repeat2
          * #i = 0, 1, 2, 3#
          */
-        const @type@ b@i@ = @from@(data[@i@]);
-        const @type@ c@i@ = @from@(data_out[@i@]);
+        const @temptype@ b@i@ = @from@(data[@i@]);
+        const @temptype@ c@i@ = @from@(data_out[@i@]);
         /**end repeat2**/
         /**begin repeat2
          * #i = 0, 1, 2, 3#
          */
-        const @type@ abc@i@ = scalar * b@i@ + c@i@;
+        const @temptype@ abc@i@ = scalar * b@i@ + c@i@;
         /**end repeat2**/
         /**begin repeat2
          * #i = 0, 1, 2, 3#
@@ -353,8 +353,8 @@ static NPY_GCC_OPT_3 void
     }
 #endif // !NPY_DISABLE_OPTIMIZATION
     for (; count > 0; --count, ++data, ++data_out) {
-        const @type@ b = @from@(*data);
-        const @type@ c = @from@(*data_out);
+        const @temptype@ b = @from@(*data);
+        const @temptype@ c = @from@(*data_out);
         *data_out = @to@(scalar * b + c);
     }
 #endif // NPYV check for @type@
@@ -417,14 +417,14 @@ static void
         /**begin repeat2
          * #i = 0, 1, 2, 3#
          */
-        const @type@ a@i@ = @from@(data0[@i@]);
-        const @type@ b@i@ = @from@(data1[@i@]);
-        const @type@ c@i@ = @from@(data_out[@i@]);
+        const @temptype@ a@i@ = @from@(data0[@i@]);
+        const @temptype@ b@i@ = @from@(data1[@i@]);
+        const @temptype@ c@i@ = @from@(data_out[@i@]);
         /**end repeat2**/
         /**begin repeat2
          * #i = 0, 1, 2, 3#
          */
-        const @type@ abc@i@ = a@i@ * b@i@ + c@i@;
+        const @temptype@ abc@i@ = a@i@ * b@i@ + c@i@;
         /**end repeat2**/
         /**begin repeat2
          * #i = 0, 1, 2, 3#
@@ -434,9 +434,9 @@ static void
     }
 #endif // !NPY_DISABLE_OPTIMIZATION
     for (; count > 0; --count, ++data0, ++data1, ++data_out) {
-        const @type@ a = @from@(*data0);
-        const @type@ b = @from@(*data1);
-        const @type@ c = @from@(*data_out);
+        const @temptype@ a = @from@(*data0);
+        const @temptype@ b = @from@(*data1);
+        const @temptype@ c = @from@(*data_out);
         *data_out = @to@(a * b + c);
     }
 #endif // NPYV check for @type@
@@ -521,14 +521,14 @@ static NPY_GCC_OPT_3 void
         /**begin repeat2
          * #i = 0, 1, 2, 3#
          */
-        const @type@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]);
+        const @temptype@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]);
         /**end repeat2**/
         accum += ab0 + ab1 + ab2 + ab3;
     }
 #endif // !NPY_DISABLE_OPTIMIZATION
     for (; count > 0; --count, ++data0, ++data1) {
-        const @type@ a = @from@(*data0);
-        const @type@ b = @from@(*data1);
+        const @temptype@ a = @from@(*data0);
+        const @temptype@ b = @from@(*data1);
         accum += a * b;
     }
 #endif // NPYV check for @type@

From d9dae76fce6fe3e87ee01670d3905f6fbdd04569 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Tue, 9 Nov 2021 18:30:45 -0600
Subject: [PATCH 2/2] TST: Add exhaustive test for einsum specialized loops

This hopefully tests things well enough, at least some/most of the
paths get triggered and led to errors without the previous float16
typing fixes.

I manually confirmed that all paths that were *modified* in the
previous commit actually get hit with float16 specialized loops.

NOTE: This test may be a bit fragile with floating point roundoff
errors, and can in parts be relaxed if this happens.
---
 numpy/core/tests/test_einsum.py | 48 +++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py
index 78c5e527bbc4..172311624c27 100644
--- a/numpy/core/tests/test_einsum.py
+++ b/numpy/core/tests/test_einsum.py
@@ -1,5 +1,7 @@
 import itertools
 
+import pytest
+
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
@@ -744,6 +746,52 @@ def test_einsum_all_contig_non_contig_output(self):
         np.einsum('ij,jk->ik', x, x, out=out)
         assert_array_equal(out.base, correct_base)
 
+    @pytest.mark.parametrize("dtype",
+             np.typecodes["AllFloat"] + np.typecodes["AllInteger"])
+    def test_different_paths(self, dtype):
+        # Test originally added to cover broken float16 path: gh-20305
+        # Likely most are covered elsewhere, at least partially.
+        dtype = np.dtype(dtype)
+        # Simple test, designed to excersize most specialized code paths,
+        # note the +0.5 for floats.  This makes sure we use a float value
+        # where the results must be exact.
+        arr = (np.arange(7) + 0.5).astype(dtype)
+        scalar = np.array(2, dtype=dtype)
+
+        # contig -> scalar:
+        res = np.einsum('i->', arr)
+        assert res == arr.sum()
+        # contig, contig -> contig:
+        res = np.einsum('i,i->i', arr, arr)
+        assert_array_equal(res, arr * arr)
+        # noncontig, noncontig -> contig:
+        res = np.einsum('i,i->i', arr.repeat(2)[::2], arr.repeat(2)[::2])
+        assert_array_equal(res, arr * arr)
+        # contig + contig -> scalar
+        assert np.einsum('i,i->', arr, arr) == (arr * arr).sum()
+        # contig + scalar -> contig (with out)
+        out = np.ones(7, dtype=dtype)
+        res = np.einsum('i,->i', arr, dtype.type(2), out=out)
+        assert_array_equal(res, arr * dtype.type(2))
+        # scalar + contig -> contig (with out)
+        res = np.einsum(',i->i', scalar, arr)
+        assert_array_equal(res, arr * dtype.type(2))
+        # scalar + contig -> scalar
+        res = np.einsum(',i->', scalar, arr)
+        # Use einsum to compare to not have difference due to sum round-offs:
+        assert res == np.einsum('i->', scalar * arr)
+        # contig + scalar -> scalar
+        res = np.einsum('i,->', arr, scalar)
+        # Use einsum to compare to not have difference due to sum round-offs:
+        assert res == np.einsum('i->', scalar * arr)
+        # contig + contig + contig -> scalar
+        arr = np.array([0.5, 0.5, 0.25, 4.5, 3.], dtype=dtype)
+        res = np.einsum('i,i,i->', arr, arr, arr)
+        assert_array_equal(res, (arr * arr * arr).sum())
+        # four arrays:
+        res = np.einsum('i,i,i,i->', arr, arr, arr, arr)
+        assert_array_equal(res, (arr * arr * arr * arr).sum())
+
     def test_small_boolean_arrays(self):
         # See gh-5946.
         # Use array of True embedded in False.