Skip to content

Commit

Permalink
Merge pull request #26055 from eendebakpt/optimize_power_double
Browse files Browse the repository at this point in the history
ENH: Optimize np.power(x, 2) for double and float type
  • Loading branch information
r-devulap committed May 14, 2024
2 parents 1d3201e + 51bedd5 commit 35c4f37
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 2 deletions.
50 changes: 48 additions & 2 deletions numpy/_core/src/umath/loops_umath_fp.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -226,14 +226,27 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
* #type = npy_double, npy_float#
* #vsub = , f#
* #sfx = f64, f32#
* #sqrt = sqrt, sqrtf#
*/
/**begin repeat1
* #func = power, arctan2#
* #intrin = pow, atan2#
* #func = power#
* #intrin = pow#
*/
NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
{
int stride_zero = steps[1]==0;
if (stride_zero) {
BINARY_DEFS
const @type@ in2 = *(@type@ *)ip2;
if (in2 == 2.0) {
BINARY_LOOP_SLIDING {
const @type@ in1 = *(@type@ *)ip1;
*(@type@ *)op1 = in1 * in1;
}
return;
}
}
#if NPY_SIMD && defined(NPY_HAVE_AVX512_SKX) && defined(NPY_CAN_LINK_SVML)
const @type@ *src1 = (@type@*)args[0];
const @type@ *src2 = (@type@*)args[1];
Expand All @@ -258,4 +271,37 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
}
}
/**end repeat1**/

/**begin repeat1
* #func = arctan2#
* #intrin = atan2#
*/
NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
{
#if NPY_SIMD && defined(NPY_HAVE_AVX512_SKX) && defined(NPY_CAN_LINK_SVML)
const @type@ *src1 = (@type@*)args[0];
const @type@ *src2 = (@type@*)args[1];
@type@ *dst = (@type@*)args[2];
const int lsize = sizeof(src1[0]);
const npy_intp ssrc1 = steps[0] / lsize;
const npy_intp ssrc2 = steps[1] / lsize;
const npy_intp sdst = steps[2] / lsize;
const npy_intp len = dimensions[0];
assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
if (!is_mem_overlap(src1, steps[0], dst, steps[2], len) && !is_mem_overlap(src2, steps[1], dst, steps[2], len) &&
npyv_loadable_stride_@sfx@(ssrc1) && npyv_loadable_stride_@sfx@(ssrc2) &&
npyv_storable_stride_@sfx@(sdst)) {
simd_@intrin@_@sfx@(src1, ssrc1, src2, ssrc2, dst, sdst, len);
return;
}
#endif
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
*(@type@ *)op1 = npy_@intrin@@vsub@(in1, in2);
}
}
/**end repeat1**/

/**end repeat**/
13 changes: 13 additions & 0 deletions numpy/_core/tests/test_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,19 @@ def test_float_to_inf_power(self):
r = np.array([1, 1, np.inf, 0, np.inf, 0, np.inf, 0], dt)
assert_equal(np.power(a, b), r)

def test_power_fast_paths(self):
# gh-26055
for dt in [np.float32, np.float64]:
a = np.array([0, 1.1, 2, 12e12, -10., np.inf, -np.inf], dt)
expected = np.array([0.0, 1.21, 4., 1.44e+26, 100, np.inf, np.inf])
result = np.power(a, 2.)
assert_array_max_ulp(result, expected.astype(dt), maxulp=1)

a = np.array([0, 1.1, 2, 12e12], dt)
expected = np.sqrt(a).astype(dt)
result = np.power(a, 0.5)
assert_array_max_ulp(result, expected, maxulp=1)


class TestFloat_power:
def test_type_conversion(self):
Expand Down

0 comments on commit 35c4f37

Please sign in to comment.