Skip to content

Commit

Permalink
PERF: improve allnan reduction performance by up to 4x on float32
Browse files Browse the repository at this point in the history
  • Loading branch information
qwhelan committed Jan 25, 2020
1 parent a72e0d2 commit cfc7bac
Showing 1 changed file with 41 additions and 8 deletions.
49 changes: 41 additions & 8 deletions bottleneck/src/reduce_template.c
Original file line number Diff line number Diff line change
Expand Up @@ -1238,25 +1238,58 @@ REDUCE_MAIN(anynan, 0)
/* allnan ---------------------------------------------------------------- */

/* dtype = [['float64'], ['float32']] */
BN_OPT_3
REDUCE_ALL(allnan, DTYPE0) {
int f = 0;
npy_bool f = 0;
npy_DTYPE0 ai;
INIT_ALL
BN_BEGIN_ALLOW_THREADS
WHILE {
FOR {
ai = AI(DTYPE0);
if (REDUCE_CONTIGUOUS) {
const npy_intp LOOP_SIZE = 512 / sizeof(npy_DTYPE0);
const npy_intp count = it.nits * it.length;
const npy_intp loop_count = count / LOOP_SIZE;
const npy_intp residual = count % LOOP_SIZE;
const npy_DTYPE0* pa = PA(DTYPE0);
npy_bool* f_arr = malloc(LOOP_SIZE * sizeof(npy_bool));
for (npy_intp j=0; j < LOOP_SIZE; j++) {
f_arr[j] = 0;
}

for (npy_intp i=0; (i < loop_count) && (f == 0); i++) {
for (npy_intp j=0; j < LOOP_SIZE; j++) {
f_arr[j] = !isnan(pa[i * LOOP_SIZE + j]);
}

for (npy_intp j=0; j < LOOP_SIZE; j++) {
f += f_arr[j];
}
}
for (npy_intp j=0; (j < residual) && (f == 0); j++) {
const npy_DTYPE0 ai = pa[loop_count * LOOP_SIZE + j];
if (ai == ai) {
f = 1;
goto done;
}
}
NEXT
free(f_arr);
} else {
WHILE {
FOR {
ai = AI(DTYPE0);
if (ai == ai) {
f = 1;
goto done;
}
}
NEXT
}
}
done:
BN_END_ALLOW_THREADS
if (f) Py_RETURN_FALSE;
Py_RETURN_TRUE;
if (f) {
Py_RETURN_FALSE;
} else {
Py_RETURN_TRUE;
}
}

REDUCE_ONE(allnan, DTYPE0) {
Expand Down

0 comments on commit cfc7bac

Please sign in to comment.