From a1023b432060362055296691bc5822a910b24f05 Mon Sep 17 00:00:00 2001 From: stomfaig Date: Fri, 14 Nov 2025 10:49:57 +0000 Subject: [PATCH 1/6] adding cases for vpermilpd and vpermilps --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index cee3c1b8cf8f3..9ea66c6de7553 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4562,6 +4562,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; return std::make_pair(0, static_cast(LaneBase + Sel)); }); + + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilpd512: + return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast(LaneOffset + Index)); + }); + + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: + return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 4; + unsigned BitsPerElem = 2; + unsigned MaskBits = 8; + unsigned IndexMask = 0x3; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast(LaneOffset + Index)); + }); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: From 495094e19f8de9b2aca3d5b4a262fb03445190a6 Mon Sep 17 00:00:00 2001 From: stomfaig Date: Fri, 14 Nov 2025 15:53:19 +0000 Subject: [PATCH 2/6] vpermilps has same impl as pshufd --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 9ea66c6de7553..4a09bc41dd4e0 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4555,6 +4555,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: return interp__builtin_ia32_shuffle_generic( S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { unsigned LaneBase = (DstIdx / 4) * 4; @@ -4577,21 +4580,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned Index = (Control >> BitIndex) & IndexMask; return std::make_pair(0, static_cast(LaneOffset + Index)); }); - - case X86::BI__builtin_ia32_vpermilps: - case X86::BI__builtin_ia32_vpermilps256: - case X86::BI__builtin_ia32_vpermilps512: - return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { - unsigned NumElemPerLane = 4; - unsigned BitsPerElem = 2; - unsigned MaskBits = 8; - unsigned IndexMask = 0x3; - unsigned Lane = DstIdx / NumElemPerLane; - unsigned LaneOffset = Lane * NumElemPerLane; - unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; - unsigned Index = (Control >> BitIndex) & IndexMask; - return std::make_pair(0, static_cast(LaneOffset + Index)); - }); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: From c004b68db4dc43b57db6ec39b5609f778b13d629 Mon Sep 17 00:00:00 2001 From: stomfaig Date: Fri, 14 Nov 2025 15:55:54 +0000 Subject: [PATCH 3/6] impl vpermilps and vpermilpd in ExprConstant --- clang/lib/AST/ExprConstant.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b7da89ab3dcf2..b30bdf3d24508 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12998,7 +12998,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: - case X86::BI__builtin_ia32_pshufd512: { + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilps512: { APValue R; if (!evalShuffleGeneric( Info, E, R, @@ -13015,6 +13018,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilpd512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast(LaneOffset + Index)); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) From f2c0a61b594c61669d0e205a7fec2d513e4e8122 Mon Sep 17 00:00:00 2001 From: stomfaig Date: Fri, 14 Nov 2025 15:56:39 +0000 Subject: [PATCH 4/6] format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 25 ++++++++++++------------ clang/lib/AST/ExprConstant.cpp | 22 ++++++++++----------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4a09bc41dd4e0..f88174ca093d3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4565,21 +4565,22 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; return std::make_pair(0, static_cast(LaneBase + Sel)); }); - + case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_vpermilpd512: - return interp__builtin_ia32_shuffle_generic(S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { - unsigned NumElemPerLane = 2; - unsigned BitsPerElem = 1; - unsigned MaskBits = 8; - unsigned IndexMask = 0x1; - unsigned Lane = DstIdx / NumElemPerLane; - unsigned LaneOffset = Lane * NumElemPerLane; - unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; - unsigned Index = (Control >> BitIndex) & IndexMask; - return std::make_pair(0, static_cast(LaneOffset + Index)); - }); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast(LaneOffset + Index)); + }); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b30bdf3d24508..25233d4ceff57 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13023,20 +13023,20 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_vpermilpd512: { APValue R; if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) { - unsigned NumElemPerLane = 2; - unsigned BitsPerElem = 1; - unsigned MaskBits = 8; - unsigned IndexMask = 0x1; - unsigned Lane = DstIdx / NumElemPerLane; - unsigned LaneOffset = Lane * NumElemPerLane; - unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; - unsigned Index = (Control >> BitIndex) & IndexMask; - return std::make_pair(0, static_cast(LaneOffset + Index)); - })) + unsigned NumElemPerLane = 2; + unsigned BitsPerElem = 1; + unsigned MaskBits = 8; + unsigned IndexMask = 0x1; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned Index = (Control >> BitIndex) & IndexMask; + return std::make_pair(0, static_cast(LaneOffset + Index)); + })) return false; return Success(R, E); } - + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) From 87017fc335f207a368125329a01972cdcc6539f3 Mon Sep 17 00:00:00 2001 From: stomfaig Date: Tue, 18 Nov 2025 20:52:21 +0000 Subject: [PATCH 5/6] make intrinsics Constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 69d18679fd6ec..0aa2249c1f8be 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -511,7 +511,7 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpermilpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">; def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; } @@ -527,6 +527,8 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; + def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; + def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; foreach Op = ["hadd", "hsub"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; @@ -535,8 +537,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid } let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; @@ -2365,10 +2365,12 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">; def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">; } - -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; +} + +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">; def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">; } From 1a7fd492bbedd9a59b617ef92e9fe7355ced5b65 Mon Sep 17 00:00:00 2001 From: stomfaig Date: Tue, 18 Nov 2025 20:52:32 +0000 Subject: [PATCH 6/6] adding tests --- clang/test/CodeGen/X86/avx-builtins.c | 5 ++ clang/test/CodeGen/X86/avx512f-builtins.c | 32 +++++++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 53 ++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 737febbc7fef6..6bf5df8a30b95 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1398,18 +1398,21 @@ __m128d test_mm_permute_pd(__m128d A) { // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> return _mm_permute_pd(A, 1); } +TEST_CONSTEXPR(match_m128d(_mm_permute_pd(((__m128d){1.0, 2.0}), 1), 2.0, 1.0)); __m256d test_mm256_permute_pd(__m256d A) { // CHECK-LABEL: test_mm256_permute_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> return _mm256_permute_pd(A, 5); } +TEST_CONSTEXPR(match_m256d(_mm256_permute_pd(((__m256d){1.0f, 2.0f, 3.0f, 4.0f}), 5), 2.0f, 1.0f, 4.0f, 3.0f)); __m128 test_mm_permute_ps(__m128 A) { // CHECK-LABEL: test_mm_permute_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> return _mm_permute_ps(A, 0x1b); } +TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0x1b), 4.0, 3.0, 2.0, 1.0)); // Test case for PR12401 __m128 test2_mm_permute_ps(__m128 a) { @@ -1417,12 +1420,14 @@ __m128 test2_mm_permute_ps(__m128 a) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> return _mm_permute_ps(a, 0xe6); } +TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0xe6), 3.0, 2.0, 3.0, 4.0)); __m256 test_mm256_permute_ps(__m256 A) { // CHECK-LABEL: test_mm256_permute_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> return _mm256_permute_ps(A, 0x1b); } +TEST_CONSTEXPR(match_m256(_mm256_permute_ps(((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), 0x1b), 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0)); __m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_permute2f128_pd diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 71e700af0069e..5d550530108ca 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -5448,6 +5448,7 @@ __m512d test_mm512_permute_pd(__m512d __X) { // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> return _mm512_permute_pd(__X, 2); } +TEST_CONSTEXPR(match_m512d(_mm512_permute_pd(((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 2), 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 6.0)); __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) { // CHECK-LABEL: test_mm512_mask_permute_pd @@ -5455,6 +5456,13 @@ __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permute_pd(__W, __U, __X, 2); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_permute_pd( + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b01010100, + ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + 2), + 0.0, 1.0, 10.0, 3.0, 12.0, 5.0, 14.0, 7.0 +)); __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) { // CHECK-LABEL: test_mm512_maskz_permute_pd @@ -5462,12 +5470,23 @@ __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permute_pd(__U, __X, 2); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_permute_pd( + (__mmask8)0b01010100, + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + 2), + 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0 +)); __m512 test_mm512_permute_ps(__m512 __X) { // CHECK-LABEL: test_mm512_permute_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> return _mm512_permute_ps(__X, 2); } +TEST_CONSTEXPR(match_m512(_mm512_permute_ps( + ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + 2), + 2, 0, 0, 0, 6, 4, 4, 4, 10, 8, 8, 8, 14, 12, 12, 12 +)); __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) { // CHECK-LABEL: test_mm512_mask_permute_ps @@ -5475,6 +5494,13 @@ __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permute_ps(__W, __U, __X, 2); } +TEST_CONSTEXPR(match_m512(_mm512_mask_permute_ps( + ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + (__mmask16)0b1010101010101010, + ((__m512){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), + 2), + 0, 16, 2, 16, 4, 20, 6, 20, 8, 24, 10, 24, 12, 28, 14, 28 +)); __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) { // CHECK-LABEL: test_mm512_maskz_permute_ps @@ -5482,6 +5508,12 @@ __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permute_ps(__U, __X, 2); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_permute_ps( + (__mmask16)0b1010101010101010, + ((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + 2), + 0, 0, 0, 0, 0, 4, 0, 4, 0, 8, 0, 8, 0, 12, 0, 12 +)); __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) { // CHECK-LABEL: test_mm512_permutevar_pd diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index a7eee79c97539..1312057d3f7a2 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -7951,6 +7951,13 @@ __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_permute_pd(__W, __U, __X, 1); } +TEST_CONSTEXPR(match_m128d(_mm_mask_permute_pd( + ((__m128d){0.0, 1.0}), + (__mmask8)0b10, + ((__m128d){2.0, 3.0}), + 1), + 0.0, 2.0 +)); __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) { // CHECK-LABEL: test_mm_maskz_permute_pd @@ -7958,6 +7965,12 @@ __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_permute_pd(__U, __X, 1); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_permute_pd( + (__mmask8)0b10, + ((__m128d){1.0, 2.0}), + 1), + 0.0, 1.0 +)); __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) { // CHECK-LABEL: test_mm256_mask_permute_pd @@ -7965,6 +7978,13 @@ __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_permute_pd(__W, __U, __X, 5); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_permute_pd( + ((__m256d){0.0, 1.0, 2.0, 3.0}), + (__mmask8)0b1010, + ((__m256d){4.0, 5.0, 6.0, 7.0}), + 5), + 0.0, 4.0, 2.0, 6.0 +)); __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) { // CHECK-LABEL: test_mm256_maskz_permute_pd @@ -7972,6 +7992,12 @@ __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_permute_pd(__U, __X, 5); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_permute_pd( + (__mmask8)0b1010, + ((__m256d){4.0, 5.0, 6.0, 7.0}), + 5), + 0.0, 4.0, 0.0, 6.0 +)); __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) { // CHECK-LABEL: test_mm_mask_permute_ps @@ -7979,6 +8005,13 @@ __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_permute_ps(__W, __U, __X, 0x1b); } +TEST_CONSTEXPR(match_m128(_mm_mask_permute_ps( + ((__m128){0.0, 1.0, 2.0, 3.0}), + (__mmask8)0b1010, + ((__m128){4.0, 5.0, 6.0, 7.0}), + 0x1b), + 0, 6.0, 2.0, 4.0 +)); __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) { // CHECK-LABEL: test_mm_maskz_permute_ps @@ -7986,6 +8019,13 @@ __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_permute_ps(__U, __X, 0x1b); } +TEST_CONSTEXPR(match_m128(_mm_maskz_permute_ps( + (__mmask8)0b1010, + ((__m128){4.0, 5.0, 6.0, 7.0}), + 0x1b), + 0.0, 6.0, 0.0, 4.0 +)); + __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) { // CHECK-LABEL: test_mm256_mask_permute_ps @@ -7993,6 +8033,13 @@ __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); } +TEST_CONSTEXPR(match_m256(_mm256_mask_permute_ps( + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b10101010, + ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + 0x1b), + 0.0, 10.0, 2.0, 8.0, 4.0, 14.0, 6.0, 12.0 +)); __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) { // CHECK-LABEL: test_mm256_maskz_permute_ps @@ -8000,6 +8047,12 @@ __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_permute_ps(__U, __X, 0x1b); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_permute_ps( + (__mmask8)0b10101010, + ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + 0x1b), + 0.0, 10.0, 0.0, 8.0, 0.0, 14.0, 0.0, 12.0 +)); __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { // CHECK-LABEL: test_mm_mask_permutevar_pd