Skip to content

Commit

Permalink
ENH: Remove looping definitions | Renamed fast loop macros
Browse files Browse the repository at this point in the history
  • Loading branch information
ganesh-k13 committed Nov 22, 2020
1 parent f93ca93 commit 285d810
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 28 deletions.
15 changes: 3 additions & 12 deletions numpy/core/src/umath/fast_loop_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ abs_ptrdiff(char *a, char *b)
npy_intp i;\
for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2)

#define BINARY_LOOP_BASE\
#define BINARY_DEFS\
char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
npy_intp n = dimensions[0];\
Expand All @@ -55,15 +55,9 @@ abs_ptrdiff(char *a, char *b)
#define BINARY_LOOP_SLIDING\
for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1)

#define BINARY_LOOP_FIXED\
for(i = 0; i < n; i++, ip1 += is1, op1 += os1)

#define BINARY_LOOP_ZERO\
for(i = 0; i < n; i++, op1 += os1)

/** (ip1, ip2) -> (op1) */
#define BINARY_LOOP\
BINARY_LOOP_BASE\
BINARY_DEFS\
BINARY_LOOP_SLIDING

/** (ip1, ip2) -> (op1, op2) */
Expand Down Expand Up @@ -167,10 +161,7 @@ abs_ptrdiff(char *a, char *b)
#define IVDEP_LOOP
#endif
#define BASE_BINARY_LOOP_INP(tin, tout, op) \
char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
npy_intp n = dimensions[0];\
npy_intp i;\
BINARY_DEFS\
IVDEP_LOOP \
for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) { \
const tin in1 = *(tin *)ip1; \
Expand Down
32 changes: 16 additions & 16 deletions numpy/core/src/umath/loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -847,35 +847,35 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void

/* Libdivide only supports 32 and 64 bit types
* We try to pick the best possible one */
/**begin repeat1
* #kind = t, gen, do#
*/
#if NPY_BITSOF_@TYPE@ <= 32
#define libdivide_@type@_@kind@ libdivide_s32_@kind@
#define libdivide_@type@_t libdivide_s32_t
#define libdivide_@type@_gen libdivide_s32_gen
#define libdivide_@type@_do libdivide_s32_do
#else
#define libdivide_@type@_@kind@ libdivide_s64_@kind@
#define libdivide_@type@_t libdivide_s64_t
#define libdivide_@type@_gen libdivide_s64_gen
#define libdivide_@type@_do libdivide_s64_do
#endif
/**end repeat1**/

NPY_NO_EXPORT void
@TYPE@_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP_BASE
BINARY_DEFS

/* When the divisor is a constant, use libdivde for faster division */
if (steps[1] == 0) {
const @type@ in2 = *(@type@ *)ip2;

/* If divisor is 0, we need not compute anything*/
if (in2 == 0) {
BINARY_LOOP_ZERO {
BINARY_LOOP_SLIDING {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
}
}
else {
struct libdivide_@type@_t fast_d = libdivide_@type@_gen(in2);
BINARY_LOOP_FIXED {
BINARY_LOOP_SLIDING {
const @type@ in1 = *(@type@ *)ip1;
/*
* FIXME: On x86 at least, dividing the smallest representable integer
Expand Down Expand Up @@ -1412,22 +1412,22 @@ TIMEDELTA_dm_m_multiply(char **args, npy_intp const *dimensions, npy_intp const
NPY_NO_EXPORT void
TIMEDELTA_mq_m_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
BINARY_LOOP_BASE
BINARY_DEFS

/* When the divisor is a constant, use libdivde for faster division */
if (steps[1] == 0) {
const npy_int64 in2 = *(npy_int64 *)ip2;

/* If divisor is 0, we need not compute anything */
if (in2 == 0) {
BINARY_LOOP_ZERO {
BINARY_LOOP_SLIDING {
npy_set_floatstatus_divbyzero();
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
}
}
else {
struct libdivide_s64_t fast_d = libdivide_s64_gen(in2);
BINARY_LOOP_FIXED {
BINARY_LOOP_SLIDING {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
if (in1 == NPY_DATETIME_NAT) {
*((npy_timedelta *)op1) = NPY_DATETIME_NAT;
Expand Down Expand Up @@ -1520,28 +1520,28 @@ NPY_NO_EXPORT void
TIMEDELTA_mm_q_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
/* NOTE: This code is similar to array floor divide*/
BINARY_LOOP_BASE
BINARY_DEFS

/* When the divisor is a constant, use libdivde for faster division */
if (steps[1] == 0) {
const npy_timedelta in2 = *(npy_timedelta *)ip2;

/* If divisor is 0 or NAT, we need not compute anything */
if (in2 == 0) {
BINARY_LOOP_ZERO {
BINARY_LOOP_SLIDING {
npy_set_floatstatus_divbyzero();
*((npy_int64 *)op1) = 0;
}
}
else if (in2 == NPY_DATETIME_NAT) {
BINARY_LOOP_ZERO {
BINARY_LOOP_SLIDING {
npy_set_floatstatus_invalid();
*((npy_int64 *)op1) = 0;
}
}
else {
struct libdivide_s64_t fast_d = libdivide_s64_gen(in2);
BINARY_LOOP_FIXED {
BINARY_LOOP_SLIDING {
const npy_timedelta in1 = *(npy_timedelta *)ip1;
if (in1 == NPY_DATETIME_NAT) {
npy_set_floatstatus_invalid();
Expand Down

0 comments on commit 285d810

Please sign in to comment.