diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 7a14c6ec21a1a..3742746def75f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -512,7 +512,7 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpermilpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">; def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; } @@ -528,6 +528,8 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; + def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; + def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; foreach Op = ["hadd", "hsub"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; @@ -536,8 +538,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid } let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; @@ -2375,10 +2375,12 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">; def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">; } - -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; +} + +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">; def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b6013834b6852..c63c2ce83c76f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4620,6 +4620,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: return interp__builtin_ia32_shuffle_generic( S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { unsigned LaneBase = (DstIdx / 4) * 4; @@ -4628,6 +4631,22 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::make_pair(0, static_cast(LaneBase + Sel)); }); + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast(LaneOffset + Index)); + }); + case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 120c68d27de13..a9cff7f88d6f2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13023,7 +13023,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: - case X86::BI__builtin_ia32_pshufd512: { + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: { APValue R; if (!evalShuffleGeneric( Info, E, R, @@ -13040,6 +13043,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilpd512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast(LaneOffset + Index)); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 46bc28b85d8db..f8931e7e55410 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1400,18 +1400,21 @@ __m128d test_mm_permute_pd(__m128d A) { // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> return _mm_permute_pd(A, 1); } +TEST_CONSTEXPR(match_m128d(_mm_permute_pd(((__m128d){1.0, 2.0}), 1), 2.0, 1.0)); __m256d test_mm256_permute_pd(__m256d A) { // CHECK-LABEL: test_mm256_permute_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> return _mm256_permute_pd(A, 5); } +TEST_CONSTEXPR(match_m256d(_mm256_permute_pd(((__m256d){1.0f, 2.0f, 3.0f, 4.0f}), 5), 2.0f, 1.0f, 4.0f, 3.0f)); __m128 test_mm_permute_ps(__m128 A) { // CHECK-LABEL: test_mm_permute_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> return _mm_permute_ps(A, 0x1b); } +TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0x1b), 4.0, 3.0, 2.0, 1.0)); // Test case for PR12401 __m128 test2_mm_permute_ps(__m128 a) { @@ -1419,12 +1422,14 @@ __m128 test2_mm_permute_ps(__m128 a) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> return _mm_permute_ps(a, 0xe6); } +TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0xe6), 3.0, 2.0, 3.0, 4.0)); __m256 test_mm256_permute_ps(__m256 A) { // CHECK-LABEL: test_mm256_permute_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> return _mm256_permute_ps(A, 0x1b); } +TEST_CONSTEXPR(match_m256(_mm256_permute_ps(((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), 0x1b), 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0)); __m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_permute2f128_pd diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index eb25aa538e9a3..e4a9d9cb3781d 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -5516,6 +5516,7 @@ __m512d test_mm512_permute_pd(__m512d __X) { // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> return _mm512_permute_pd(__X, 2); } +TEST_CONSTEXPR(match_m512d(_mm512_permute_pd(((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 2), 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 6.0)); __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) { // CHECK-LABEL: test_mm512_mask_permute_pd @@ -5523,6 +5524,13 @@ __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permute_pd(__W, __U, __X, 2); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_permute_pd( + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b01010100, + ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + 2), + 0.0, 1.0, 10.0, 3.0, 12.0, 5.0, 14.0, 7.0 +)); __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) { // CHECK-LABEL: test_mm512_maskz_permute_pd @@ -5530,12 +5538,23 @@ __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permute_pd(__U, __X, 2); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_permute_pd( + (__mmask8)0b01010100, + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + 2), + 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0 +)); __m512 test_mm512_permute_ps(__m512 __X) { // CHECK-LABEL: test_mm512_permute_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> return _mm512_permute_ps(__X, 2); } +TEST_CONSTEXPR(match_m512(_mm512_permute_ps( + ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + 2), + 2, 0, 0, 0, 6, 4, 4, 4, 10, 8, 8, 8, 14, 12, 12, 12 +)); __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) { // CHECK-LABEL: test_mm512_mask_permute_ps @@ -5543,6 +5562,13 @@ __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permute_ps(__W, __U, __X, 2); } +TEST_CONSTEXPR(match_m512(_mm512_mask_permute_ps( + ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + (__mmask16)0b1010101010101010, + ((__m512){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), + 2), + 0, 16, 2, 16, 4, 20, 6, 20, 8, 24, 10, 24, 12, 28, 14, 28 +)); __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) { // CHECK-LABEL: test_mm512_maskz_permute_ps @@ -5550,6 +5576,12 @@ __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permute_ps(__U, __X, 2); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_permute_ps( + (__mmask16)0b1010101010101010, + ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + 2), + 0, 0, 0, 0, 0, 4, 0, 4, 0, 8, 0, 8, 0, 12, 0, 12 +)); __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) { // CHECK-LABEL: test_mm512_permutevar_pd diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index e05b1ddf7b69a..69adc75c80f1c 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -8022,6 +8022,13 @@ __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_permute_pd(__W, __U, __X, 1); } +TEST_CONSTEXPR(match_m128d(_mm_mask_permute_pd( + ((__m128d){0.0, 1.0}), + (__mmask8)0b10, + ((__m128d){2.0, 3.0}), + 1), + 0.0, 2.0 +)); __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) { // CHECK-LABEL: test_mm_maskz_permute_pd @@ -8029,6 +8036,12 @@ __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_permute_pd(__U, __X, 1); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_permute_pd( + (__mmask8)0b10, + ((__m128d){1.0, 2.0}), + 1), + 0.0, 1.0 +)); __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) { // CHECK-LABEL: test_mm256_mask_permute_pd @@ -8036,6 +8049,13 @@ __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_permute_pd(__W, __U, __X, 5); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_permute_pd( + ((__m256d){0.0, 1.0, 2.0, 3.0}), + (__mmask8)0b1010, + ((__m256d){4.0, 5.0, 6.0, 7.0}), + 5), + 0.0, 4.0, 2.0, 6.0 +)); __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) { // CHECK-LABEL: test_mm256_maskz_permute_pd @@ -8043,6 +8063,12 @@ __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_permute_pd(__U, __X, 5); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_permute_pd( + (__mmask8)0b1010, + ((__m256d){4.0, 5.0, 6.0, 7.0}), + 5), + 0.0, 4.0, 0.0, 6.0 +)); __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) { // CHECK-LABEL: test_mm_mask_permute_ps @@ -8050,6 +8076,13 @@ __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_permute_ps(__W, __U, __X, 0x1b); } +TEST_CONSTEXPR(match_m128(_mm_mask_permute_ps( + ((__m128){0.0, 1.0, 2.0, 3.0}), + (__mmask8)0b1010, + ((__m128){4.0, 5.0, 6.0, 7.0}), + 0x1b), + 0, 6.0, 2.0, 4.0 +)); __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) { // CHECK-LABEL: test_mm_maskz_permute_ps @@ -8057,6 +8090,13 @@ __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_permute_ps(__U, __X, 0x1b); } +TEST_CONSTEXPR(match_m128(_mm_maskz_permute_ps( + (__mmask8)0b1010, + ((__m128){4.0, 5.0, 6.0, 7.0}), + 0x1b), + 0.0, 6.0, 0.0, 4.0 +)); + __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) { // CHECK-LABEL: test_mm256_mask_permute_ps @@ -8064,6 +8104,13 @@ __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); } +TEST_CONSTEXPR(match_m256(_mm256_mask_permute_ps( + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b10101010, + ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + 0x1b), + 0.0, 10.0, 2.0, 8.0, 4.0, 14.0, 6.0, 12.0 +)); __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) { // CHECK-LABEL: test_mm256_maskz_permute_ps @@ -8071,6 +8118,12 @@ __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_permute_ps(__U, __X, 0x1b); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_permute_ps( + (__mmask8)0b10101010, + ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + 0x1b), + 0.0, 10.0, 0.0, 8.0, 0.0, 14.0, 0.0, 12.0 +)); __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { // CHECK-LABEL: test_mm_mask_permutevar_pd