Skip to content

Commit

Permalink
ENH: improve the speed of array conversions using AVX2 if available
Browse files Browse the repository at this point in the history
  • Loading branch information
zephyr111 committed Feb 26, 2022
1 parent 08248aa commit e79f312
Showing 1 changed file with 48 additions and 19 deletions.
67 changes: 48 additions & 19 deletions numpy/core/src/multiarray/lowlevel_strided_loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,15 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/

/**begin repeat
* #isa = , _avx2#
* #ISA = , AVX2#
* #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)#
* #ATTR = , NPY_GCC_TARGET_AVX2#
*/

#if @CHK@

/**begin repeat1
*
* #NAME1 = BOOL,
* UBYTE, USHORT, UINT, ULONG, ULONGLONG,
Expand Down Expand Up @@ -735,7 +744,7 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
* #is_complex1 = 0*15, 1*3#
*/

/**begin repeat1
/**begin repeat2
*
* #NAME2 = BOOL,
* UBYTE, USHORT, UINT, ULONG, ULONGLONG,
Expand Down Expand Up @@ -764,7 +773,7 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
* #is_complex2 = 0*15, 1*3#
*/

/**begin repeat2
/**begin repeat3
* #prefix = _aligned,,_aligned_contig,_contig#
* #aligned = 1,0,1,0#
* #contig = 0,0,1,1#
Expand Down Expand Up @@ -837,8 +846,9 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *

#endif

static NPY_GCC_OPT_3 int
@prefix@_cast_@name1@_to_@name2@(
#if @CHK@
static NPY_GCC_OPT_3 @ATTR@ int
@prefix@_cast_@name1@_to_@name2@@isa@(
PyArrayMethod_Context *context, char *const *args,
const npy_intp *dimensions, const npy_intp *strides,
NpyAuxData *NPY_UNUSED(data))
Expand Down Expand Up @@ -866,7 +876,7 @@ static NPY_GCC_OPT_3 int
assert(N == 0 || npy_is_aligned(dst, _ALIGN(_TYPE2)));
#endif
/*printf("@prefix@_cast_@name1@_to_@name2@\n");*/
/*printf("@prefix@_cast_@name1@_to_@name2@@isa@\n");*/

while (N--) {
#if @aligned@
Expand Down Expand Up @@ -930,26 +940,27 @@ static NPY_GCC_OPT_3 int
}
return 0;
}
#endif

#undef _CONVERT_FN
#undef _TYPE2
#undef _TYPE1

#endif

/**end repeat3**/

/**end repeat2**/

/**end repeat1**/

/**end repeat**/

NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride,
static PyArrayMethod_StridedLoop *
PyArray_GetStridedNumericCastFn_body@isa@(int aligned, npy_intp src_stride,
npy_intp dst_stride,
int src_type_num, int dst_type_num)
{
switch (src_type_num) {
/**begin repeat
/**begin repeat1
*
* #NAME1 = BOOL,
* UBYTE, USHORT, UINT, ULONG, ULONGLONG,
Expand All @@ -971,7 +982,7 @@ PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride,
case NPY_@NAME1@:
/*printf("test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);*/
switch (dst_type_num) {
/**begin repeat1
/**begin repeat2
*
* #NAME2 = BOOL,
* UBYTE, USHORT, UINT, ULONG, ULONGLONG,
Expand All @@ -995,34 +1006,52 @@ PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride,
# if NPY_USE_UNALIGNED_ACCESS
if (src_stride == sizeof(@type1@) &&
dst_stride == sizeof(@type2@)) {
return &_aligned_contig_cast_@name1@_to_@name2@;
return &_aligned_contig_cast_@name1@_to_@name2@@isa@;
}
else {
return &_aligned_cast_@name1@_to_@name2@;
return &_aligned_cast_@name1@_to_@name2@@isa@;
}
# else
if (src_stride == sizeof(@type1@) &&
dst_stride == sizeof(@type2@)) {
return aligned ?
&_aligned_contig_cast_@name1@_to_@name2@ :
&_contig_cast_@name1@_to_@name2@;
&_aligned_contig_cast_@name1@_to_@name2@@isa@ :
&_contig_cast_@name1@_to_@name2@@isa@;
}
else {
return aligned ? &_aligned_cast_@name1@_to_@name2@ :
&_cast_@name1@_to_@name2@;
return aligned ? &_aligned_cast_@name1@_to_@name2@@isa@ :
&_cast_@name1@_to_@name2@@isa@;
}
# endif

/**end repeat1**/
/**end repeat2**/
}
/*printf("switched test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);*/

/**end repeat**/
/**end repeat1**/
}

return NULL;
}

#endif

/**end repeat**/

NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride,
npy_intp dst_stride,
int src_type_num, int dst_type_num)
{
#ifdef HAVE_ATTRIBUTE_TARGET_AVX2
if (NPY_CPU_HAVE(AVX2)) {
return PyArray_GetStridedNumericCastFn_body_avx2(aligned, src_stride, dst_stride, src_type_num, dst_type_num);
}
#endif

return PyArray_GetStridedNumericCastFn_body(aligned, src_stride, dst_stride, src_type_num, dst_type_num);
}


/****************** PRIMITIVE FLAT TO/FROM NDIM FUNCTIONS ******************/

Expand Down

0 comments on commit e79f312

Please sign in to comment.