Skip to content

Commit e5147f8

Browse files
committed
[X86] Remove __builtin_ia32_pabs intrinsics and use generic __builtin_elementwise_abs
D111986 added the generic `__builtin_elementwise_abs()` intrinsic with the same integer absolute behaviour as the SSE/AVX instructions (abs(INT_MIN) == INT_MIN) This patch removes the `__builtin_ia32_pabs*` intrinsics and just uses `__builtin_elementwise_abs` - the existing tests see no changes: ``` __m256i test_mm256_abs_epi8(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi8 // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) return _mm256_abs_epi8(a); } ``` This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`). Differential Revision: https://reviews.llvm.org/D117791
1 parent 46cacdb commit e5147f8

File tree

8 files changed

+16
-42
lines changed

8 files changed

+16
-42
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -296,9 +296,6 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3")
296296
TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3")
297297
TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3")
298298
TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3")
299-
TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3")
300-
TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3")
301-
TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3")
302299

303300
TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
304301
TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse")
@@ -558,9 +555,6 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
558555

559556
// AVX2
560557
TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
561-
TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2")
562-
TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2")
563-
TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2")
564558
TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
565559
TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
566560
TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
@@ -927,8 +921,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
927921
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
928922
TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
929923
TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
930-
TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f")
931-
TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f")
932924
TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
933925
TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
934926
TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
@@ -1045,8 +1037,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5
10451037
TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
10461038
TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
10471039

1048-
TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw")
1049-
TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw")
10501040
TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
10511041
TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
10521042
TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
@@ -1198,8 +1188,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
11981188
TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
11991189
TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
12001190
TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
1201-
TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl")
1202-
TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl")
12031191
TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
12041192
TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
12051193
TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14285,21 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1428514285
return Builder.CreateCall(F, Ops[0]);
1428614286
}
1428714287
}
14288-
case X86::BI__builtin_ia32_pabsb128:
14289-
case X86::BI__builtin_ia32_pabsw128:
14290-
case X86::BI__builtin_ia32_pabsd128:
14291-
case X86::BI__builtin_ia32_pabsb256:
14292-
case X86::BI__builtin_ia32_pabsw256:
14293-
case X86::BI__builtin_ia32_pabsd256:
14294-
case X86::BI__builtin_ia32_pabsq128:
14295-
case X86::BI__builtin_ia32_pabsq256:
14296-
case X86::BI__builtin_ia32_pabsb512:
14297-
case X86::BI__builtin_ia32_pabsw512:
14298-
case X86::BI__builtin_ia32_pabsd512:
14299-
case X86::BI__builtin_ia32_pabsq512: {
14300-
Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType());
14301-
return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14302-
}
1430314288
case X86::BI__builtin_ia32_pmaxsb128:
1430414289
case X86::BI__builtin_ia32_pmaxsw128:
1430514290
case X86::BI__builtin_ia32_pmaxsd128:

clang/lib/Headers/avx2intrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@
2626
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2727
_mm256_abs_epi8(__m256i __a)
2828
{
29-
return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
29+
return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
3030
}
3131

3232
static __inline__ __m256i __DEFAULT_FN_ATTRS256
3333
_mm256_abs_epi16(__m256i __a)
3434
{
35-
return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
35+
return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
3636
}
3737

3838
static __inline__ __m256i __DEFAULT_FN_ATTRS256
3939
_mm256_abs_epi32(__m256i __a)
4040
{
41-
return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
41+
return (__m256i)__builtin_elementwise_abs((__v8si)__a);
4242
}
4343

4444
static __inline__ __m256i __DEFAULT_FN_ATTRS256

clang/lib/Headers/avx512bwintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
485485
static __inline__ __m512i __DEFAULT_FN_ATTRS512
486486
_mm512_abs_epi8 (__m512i __A)
487487
{
488-
return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
488+
return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
489489
}
490490

491491
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
507507
static __inline__ __m512i __DEFAULT_FN_ATTRS512
508508
_mm512_abs_epi16 (__m512i __A)
509509
{
510-
return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
510+
return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
511511
}
512512

513513
static __inline__ __m512i __DEFAULT_FN_ATTRS512

clang/lib/Headers/avx512fintrin.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
2626
typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
2727
typedef unsigned int __v16su __attribute__((__vector_size__(64)));
2828

29+
/* We need an explicitly signed variant for char. Note that this shouldn't
30+
* appear in the interface though. */
31+
typedef signed char __v64qs __attribute__((__vector_size__(64)));
32+
2933
typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
3034
typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
3135
typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
@@ -1846,7 +1850,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
18461850
static __inline __m512i __DEFAULT_FN_ATTRS512
18471851
_mm512_abs_epi64(__m512i __A)
18481852
{
1849-
return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1853+
return (__m512i)__builtin_elementwise_abs((__v8di)__A);
18501854
}
18511855

18521856
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1868,7 +1872,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
18681872
static __inline __m512i __DEFAULT_FN_ATTRS512
18691873
_mm512_abs_epi32(__m512i __A)
18701874
{
1871-
return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1875+
return (__m512i)__builtin_elementwise_abs((__v16si) __A);
18721876
}
18731877

18741878
static __inline__ __m512i __DEFAULT_FN_ATTRS512

clang/lib/Headers/avx512vlintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
29882988

29892989
static __inline__ __m128i __DEFAULT_FN_ATTRS128
29902990
_mm_abs_epi64 (__m128i __A) {
2991-
return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
2991+
return (__m128i)__builtin_elementwise_abs((__v2di)__A);
29922992
}
29932993

29942994
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
30073007

30083008
static __inline__ __m256i __DEFAULT_FN_ATTRS256
30093009
_mm256_abs_epi64 (__m256i __A) {
3010-
return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3010+
return (__m256i)__builtin_elementwise_abs((__v4di)__A);
30113011
}
30123012

30133013
static __inline__ __m256i __DEFAULT_FN_ATTRS256

clang/lib/Headers/tmmintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a)
5353
static __inline__ __m128i __DEFAULT_FN_ATTRS
5454
_mm_abs_epi8(__m128i __a)
5555
{
56-
return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
56+
return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
5757
}
5858

5959
/// Computes the absolute value of each of the packed 16-bit signed
@@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a)
8989
static __inline__ __m128i __DEFAULT_FN_ATTRS
9090
_mm_abs_epi16(__m128i __a)
9191
{
92-
return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
92+
return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
9393
}
9494

9595
/// Computes the absolute value of each of the packed 32-bit signed
@@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a)
125125
static __inline__ __m128i __DEFAULT_FN_ATTRS
126126
_mm_abs_epi32(__m128i __a)
127127
{
128-
return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
128+
return (__m128i)__builtin_elementwise_abs((__v4si)__a);
129129
}
130130

131131
/// Concatenates the two 128-bit integer vector operands, and

clang/test/CodeGen/builtins-x86.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,11 +259,8 @@ void f0() {
259259
tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s);
260260
tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i);
261261
tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i);
262-
tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c);
263262
tmp_V8c = __builtin_ia32_pabsb(tmp_V8c);
264-
tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s);
265263
tmp_V4s = __builtin_ia32_pabsw(tmp_V4s);
266-
tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i);
267264
tmp_V2i = __builtin_ia32_pabsd(tmp_V2i);
268265
tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi);
269266
tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi);

0 commit comments

Comments
 (0)