ENH: Remove looping definitions | Renamed fast loop macros

numpy · Nov 22, 2020 · 285d810 · 285d810
1 parent f93ca93
commit 285d810
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 28 deletions.
diff --git a/numpy/core/src/umath/fast_loop_macros.h b/numpy/core/src/umath/fast_loop_macros.h
@@ -46,7 +46,7 @@ abs_ptrdiff(char *a, char *b)
     npy_intp i;\
     for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2)
 
-#define BINARY_LOOP_BASE\
+#define BINARY_DEFS\
     char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
     npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
     npy_intp n = dimensions[0];\
@@ -55,15 +55,9 @@ abs_ptrdiff(char *a, char *b)
 #define BINARY_LOOP_SLIDING\
     for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1)
 
-#define BINARY_LOOP_FIXED\
-    for(i = 0; i < n; i++, ip1 += is1, op1 += os1)
-
-#define BINARY_LOOP_ZERO\
-    for(i = 0; i < n; i++, op1 += os1)
-
 /** (ip1, ip2) -> (op1) */
 #define BINARY_LOOP\
-    BINARY_LOOP_BASE\
+    BINARY_DEFS\
     BINARY_LOOP_SLIDING
 
 /** (ip1, ip2) -> (op1, op2) */
@@ -167,10 +161,7 @@ abs_ptrdiff(char *a, char *b)
 #define IVDEP_LOOP
 #endif
 #define BASE_BINARY_LOOP_INP(tin, tout, op) \
-    char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
-    npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
-    npy_intp n = dimensions[0];\
-    npy_intp i;\
+    BINARY_DEFS\
     IVDEP_LOOP \
     for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) { \
         const tin in1 = *(tin *)ip1; \

diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
@@ -847,35 +847,35 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void
 
 /* Libdivide only supports 32 and 64 bit types
  * We try to pick the best possible one */
-/**begin repeat1
- * #kind = t, gen, do#
- */
 #if NPY_BITSOF_@TYPE@ <= 32
-#define libdivide_@type@_@kind@ libdivide_s32_@kind@
+#define libdivide_@type@_t libdivide_s32_t
+#define libdivide_@type@_gen libdivide_s32_gen
+#define libdivide_@type@_do libdivide_s32_do
 #else
-#define libdivide_@type@_@kind@ libdivide_s64_@kind@
+#define libdivide_@type@_t libdivide_s64_t
+#define libdivide_@type@_gen libdivide_s64_gen
+#define libdivide_@type@_do libdivide_s64_do
 #endif
-/**end repeat1**/
 
 NPY_NO_EXPORT void
 @TYPE@_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
-    BINARY_LOOP_BASE
+    BINARY_DEFS
 
     /* When the divisor is a constant, use libdivde for faster division */
     if (steps[1] == 0) {
         const @type@ in2 = *(@type@ *)ip2;
 
         /* If divisor is 0, we need not compute anything*/
         if (in2 == 0) {
-            BINARY_LOOP_ZERO {
+            BINARY_LOOP_SLIDING {
                 npy_set_floatstatus_divbyzero();
                 *((@type@ *)op1) = 0;
             }
         }
         else {
             struct libdivide_@type@_t fast_d = libdivide_@type@_gen(in2);
-            BINARY_LOOP_FIXED {
+            BINARY_LOOP_SLIDING {
                 const @type@ in1 = *(@type@ *)ip1;
                 /*
                  * FIXME: On x86 at least, dividing the smallest representable integer
@@ -1412,22 +1412,22 @@ TIMEDELTA_dm_m_multiply(char **args, npy_intp const *dimensions, npy_intp const
 NPY_NO_EXPORT void
 TIMEDELTA_mq_m_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
-    BINARY_LOOP_BASE
+    BINARY_DEFS
 
     /* When the divisor is a constant, use libdivde for faster division */
     if (steps[1] == 0) {
         const npy_int64 in2 = *(npy_int64 *)ip2;
 
         /* If divisor is 0, we need not compute anything */
         if (in2 == 0) {
-            BINARY_LOOP_ZERO {
+            BINARY_LOOP_SLIDING {
                 npy_set_floatstatus_divbyzero();
                 *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
             }
         }
         else {
             struct libdivide_s64_t fast_d = libdivide_s64_gen(in2);
-            BINARY_LOOP_FIXED {
+            BINARY_LOOP_SLIDING {
                 const npy_timedelta in1 = *(npy_timedelta *)ip1;
                 if (in1 == NPY_DATETIME_NAT) {
                     *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
@@ -1520,28 +1520,28 @@ NPY_NO_EXPORT void
 TIMEDELTA_mm_q_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
     /* NOTE: This code is similar to array floor divide*/
-    BINARY_LOOP_BASE
+    BINARY_DEFS
 
     /* When the divisor is a constant, use libdivde for faster division */
     if (steps[1] == 0) {
         const npy_timedelta in2 = *(npy_timedelta *)ip2;
 
         /* If divisor is 0 or NAT, we need not compute anything */
         if (in2 == 0) {
-            BINARY_LOOP_ZERO {
+            BINARY_LOOP_SLIDING {
                 npy_set_floatstatus_divbyzero();
                 *((npy_int64 *)op1) = 0;
             }
         }
         else if (in2 == NPY_DATETIME_NAT) {
-            BINARY_LOOP_ZERO {
+            BINARY_LOOP_SLIDING {
                 npy_set_floatstatus_invalid();
                 *((npy_int64 *)op1) = 0;
             }
         }
         else {
             struct libdivide_s64_t fast_d = libdivide_s64_gen(in2);
-            BINARY_LOOP_FIXED {
+             BINARY_LOOP_SLIDING {
                 const npy_timedelta in1 = *(npy_timedelta *)ip1;
                 if (in1 == NPY_DATETIME_NAT) {
                     npy_set_floatstatus_invalid();