Skip to content

Commit

Permalink
sve/true,whilelt,cmplt,ld1,st1,sel,and: skip AVX512 native implementa…
Browse files Browse the repository at this point in the history
…tions on MSVC 2017
  • Loading branch information
mr-c committed May 3, 2023
1 parent a53d550 commit accce42
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 136 deletions.
12 changes: 8 additions & 4 deletions simde/arm/sve/and.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ simde_svint32_t
simde_svand_s32_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) {
#if defined(SIMDE_ARM_SVE_NATIVE)
return svand_s32_z(pg, op1, op2);
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE))
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
simde_svint32_t r;

#if SIMDE_ARM_SVE_VECTOR_SIZE >= 512
Expand All @@ -340,7 +341,8 @@ simde_svint32_t
simde_svand_s32_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) {
#if defined(SIMDE_ARM_SVE_NATIVE)
return svand_s32_m(pg, op1, op2);
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE))
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
simde_svint32_t r;

#if SIMDE_ARM_SVE_VECTOR_SIZE >= 512
Expand Down Expand Up @@ -452,7 +454,8 @@ simde_svint64_t
simde_svand_s64_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) {
#if defined(SIMDE_ARM_SVE_NATIVE)
return svand_s64_z(pg, op1, op2);
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE))
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
simde_svint64_t r;

#if SIMDE_ARM_SVE_VECTOR_SIZE >= 512
Expand All @@ -476,7 +479,8 @@ simde_svint64_t
simde_svand_s64_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) {
#if defined(SIMDE_ARM_SVE_NATIVE)
return svand_s64_m(pg, op1, op2);
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE))
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
simde_svint64_t r;

#if SIMDE_ARM_SVE_VECTOR_SIZE >= 512
Expand Down
60 changes: 40 additions & 20 deletions simde/arm/sve/cmplt.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ simde_svcmplt_s8(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) {
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask64(_mm512_mask_cmplt_epi8_mask(simde_svbool_to_mmask64(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask32(_mm256_mask_cmplt_epi8_mask(simde_svbool_to_mmask32(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r.neon_i8 = vandq_s8(pg.neon_i8, vreinterpretq_s8_u8(vcltq_s8(op1.neon, op2.neon)));
Expand Down Expand Up @@ -81,9 +83,11 @@ simde_svcmplt_s16(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) {
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask32(_mm512_mask_cmplt_epi16_mask(simde_svbool_to_mmask32(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask16(_mm256_mask_cmplt_epi16_mask(simde_svbool_to_mmask16(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r.neon_i16 = vandq_s16(pg.neon_i16, vreinterpretq_s16_u16(vcltq_s16(op1.neon, op2.neon)));
Expand Down Expand Up @@ -122,9 +126,11 @@ simde_svcmplt_s32(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) {
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask16(_mm512_mask_cmplt_epi32_mask(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask8(_mm256_mask_cmplt_epi32_mask(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r.neon_i32 = vandq_s32(pg.neon_i32, vreinterpretq_s32_u32(vcltq_s32(op1.neon, op2.neon)));
Expand Down Expand Up @@ -163,9 +169,11 @@ simde_svcmplt_s64(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) {
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask8(_mm512_mask_cmplt_epi64_mask(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask4(_mm256_mask_cmplt_epi64_mask(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r.neon_i64 = vandq_s64(pg.neon_i64, vreinterpretq_s64_u64(vcltq_s64(op1.neon, op2.neon)));
Expand Down Expand Up @@ -200,9 +208,11 @@ simde_svcmplt_u8(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) {
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask64(_mm512_mask_cmplt_epu8_mask(simde_svbool_to_mmask64(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask32(_mm256_mask_cmplt_epu8_mask(simde_svbool_to_mmask32(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r.neon_u8 = vandq_u8(pg.neon_u8, vcltq_u8(op1.neon, op2.neon));
Expand Down Expand Up @@ -237,9 +247,11 @@ simde_svcmplt_u16(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2)
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask32(_mm512_mask_cmplt_epu16_mask(simde_svbool_to_mmask32(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask16(_mm256_mask_cmplt_epu16_mask(simde_svbool_to_mmask16(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r.neon_u16 = vandq_u16(pg.neon_u16, vcltq_u16(op1.neon, op2.neon));
Expand Down Expand Up @@ -274,9 +286,11 @@ simde_svcmplt_u32(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2)
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask16(_mm512_mask_cmplt_epu32_mask(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask8(_mm256_mask_cmplt_epu32_mask(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r.neon_u32 = vandq_u32(pg.neon_u32, vcltq_u32(op1.neon, op2.neon));
Expand Down Expand Up @@ -311,9 +325,11 @@ simde_svcmplt_u64(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2)
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask8(_mm512_mask_cmplt_epu64_mask(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask4(_mm256_mask_cmplt_epu64_mask(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0]));
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r.neon_u64 = vandq_u64(pg.neon_u64, vcltq_u64(op1.neon, op2.neon));
Expand Down Expand Up @@ -348,9 +364,11 @@ simde_svcmplt_f32(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask16(_mm512_mask_cmp_ps_mask(simde_svbool_to_mmask16(pg), op1.m512, op2.m512, _CMP_LT_OQ));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask8(_mm256_mask_cmp_ps_mask(simde_svbool_to_mmask8(pg), op1.m256[0], op2.m256[0], _CMP_LT_OQ));
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r.neon_u32 = vandq_u32(pg.neon_u32, vcltq_f32(op1.neon, op2.neon));
Expand Down Expand Up @@ -389,9 +407,11 @@ simde_svcmplt_f64(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op
#else
simde_svbool_t r;

#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask8(_mm512_mask_cmp_pd_mask(simde_svbool_to_mmask8(pg), op1.m512d, op2.m512d, _CMP_LT_OQ));
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \
&& (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0))
r = simde_svbool_from_mmask4(_mm256_mask_cmp_pd_mask(simde_svbool_to_mmask4(pg), op1.m256d[0], op2.m256d[0], _CMP_LT_OQ));
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r.neon_u64 = vandq_u64(pg.neon_u64, vcltq_f64(op1.neon, op2.neon));
Expand Down

0 comments on commit accce42

Please sign in to comment.