From a4daaf5a3924248f71caddbcbd3cf28afef802a6 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 5 Nov 2021 10:23:42 -0500 Subject: [PATCH 1/2] BUG: Fix float16 einsum fastpaths using wrong tempvar --- .../core/src/multiarray/einsum_sumprod.c.src | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/numpy/core/src/multiarray/einsum_sumprod.c.src b/numpy/core/src/multiarray/einsum_sumprod.c.src index 29ceabd71d58..3114a58960ef 100644 --- a/numpy/core/src/multiarray/einsum_sumprod.c.src +++ b/numpy/core/src/multiarray/einsum_sumprod.c.src @@ -337,13 +337,13 @@ static NPY_GCC_OPT_3 void /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ b@i@ = @from@(data[@i@]); - const @type@ c@i@ = @from@(data_out[@i@]); + const @temptype@ b@i@ = @from@(data[@i@]); + const @temptype@ c@i@ = @from@(data_out[@i@]); /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ abc@i@ = scalar * b@i@ + c@i@; + const @temptype@ abc@i@ = scalar * b@i@ + c@i@; /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# @@ -353,8 +353,8 @@ static NPY_GCC_OPT_3 void } #endif // !NPY_DISABLE_OPTIMIZATION for (; count > 0; --count, ++data, ++data_out) { - const @type@ b = @from@(*data); - const @type@ c = @from@(*data_out); + const @temptype@ b = @from@(*data); + const @temptype@ c = @from@(*data_out); *data_out = @to@(scalar * b + c); } #endif // NPYV check for @type@ @@ -417,14 +417,14 @@ static void /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ a@i@ = @from@(data0[@i@]); - const @type@ b@i@ = @from@(data1[@i@]); - const @type@ c@i@ = @from@(data_out[@i@]); + const @temptype@ a@i@ = @from@(data0[@i@]); + const @temptype@ b@i@ = @from@(data1[@i@]); + const @temptype@ c@i@ = @from@(data_out[@i@]); /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ abc@i@ = a@i@ * b@i@ + c@i@; + const @temptype@ abc@i@ = a@i@ * b@i@ + c@i@; /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# @@ -434,9 +434,9 @@ static void } #endif // !NPY_DISABLE_OPTIMIZATION for (; count > 0; --count, ++data0, ++data1, ++data_out) { - const @type@ a = @from@(*data0); - const @type@ b = @from@(*data1); - const @type@ c = @from@(*data_out); + const @temptype@ a = @from@(*data0); + const @temptype@ b = @from@(*data1); + const @temptype@ c = @from@(*data_out); *data_out = @to@(a * b + c); } #endif // NPYV check for @type@ @@ -521,14 +521,14 @@ static NPY_GCC_OPT_3 void /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]); + const @temptype@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]); /**end repeat2**/ accum += ab0 + ab1 + ab2 + ab3; } #endif // !NPY_DISABLE_OPTIMIZATION for (; count > 0; --count, ++data0, ++data1) { - const @type@ a = @from@(*data0); - const @type@ b = @from@(*data1); + const @temptype@ a = @from@(*data0); + const @temptype@ b = @from@(*data1); accum += a * b; } #endif // NPYV check for @type@ From d9dae76fce6fe3e87ee01670d3905f6fbdd04569 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 9 Nov 2021 18:30:45 -0600 Subject: [PATCH 2/2] TST: Add exhaustive test for einsum specialized loops This hopefully tests things well enough, at least some/most of the paths get triggered and led to errors without the previous float16 typing fixes. I manually confirmed that all paths that were *modified* in the previous commit actually get hit with float16 specialized loops. NOTE: This test may be a bit fragile with floating point roundoff errors, and can in parts be relaxed if this happens. --- numpy/core/tests/test_einsum.py | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py index 78c5e527bbc4..172311624c27 100644 --- a/numpy/core/tests/test_einsum.py +++ b/numpy/core/tests/test_einsum.py @@ -1,5 +1,7 @@ import itertools +import pytest + import numpy as np from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_almost_equal, @@ -744,6 +746,52 @@ def test_einsum_all_contig_non_contig_output(self): np.einsum('ij,jk->ik', x, x, out=out) assert_array_equal(out.base, correct_base) + @pytest.mark.parametrize("dtype", + np.typecodes["AllFloat"] + np.typecodes["AllInteger"]) + def test_different_paths(self, dtype): + # Test originally added to cover broken float16 path: gh-20305 + # Likely most are covered elsewhere, at least partially. + dtype = np.dtype(dtype) + # Simple test, designed to excersize most specialized code paths, + # note the +0.5 for floats. This makes sure we use a float value + # where the results must be exact. + arr = (np.arange(7) + 0.5).astype(dtype) + scalar = np.array(2, dtype=dtype) + + # contig -> scalar: + res = np.einsum('i->', arr) + assert res == arr.sum() + # contig, contig -> contig: + res = np.einsum('i,i->i', arr, arr) + assert_array_equal(res, arr * arr) + # noncontig, noncontig -> contig: + res = np.einsum('i,i->i', arr.repeat(2)[::2], arr.repeat(2)[::2]) + assert_array_equal(res, arr * arr) + # contig + contig -> scalar + assert np.einsum('i,i->', arr, arr) == (arr * arr).sum() + # contig + scalar -> contig (with out) + out = np.ones(7, dtype=dtype) + res = np.einsum('i,->i', arr, dtype.type(2), out=out) + assert_array_equal(res, arr * dtype.type(2)) + # scalar + contig -> contig (with out) + res = np.einsum(',i->i', scalar, arr) + assert_array_equal(res, arr * dtype.type(2)) + # scalar + contig -> scalar + res = np.einsum(',i->', scalar, arr) + # Use einsum to compare to not have difference due to sum round-offs: + assert res == np.einsum('i->', scalar * arr) + # contig + scalar -> scalar + res = np.einsum('i,->', arr, scalar) + # Use einsum to compare to not have difference due to sum round-offs: + assert res == np.einsum('i->', scalar * arr) + # contig + contig + contig -> scalar + arr = np.array([0.5, 0.5, 0.25, 4.5, 3.], dtype=dtype) + res = np.einsum('i,i,i->', arr, arr, arr) + assert_array_equal(res, (arr * arr * arr).sum()) + # four arrays: + res = np.einsum('i,i,i,i->', arr, arr, arr, arr) + assert_array_equal(res, (arr * arr * arr * arr).sum()) + def test_small_boolean_arrays(self): # See gh-5946. # Use array of True embedded in False.