From f26a73529db09f1bc6c423f930945de56656e6fd Mon Sep 17 00:00:00 2001 From: ahmed Date: Tue, 11 Nov 2025 15:10:28 +0200 Subject: [PATCH 1/3] [X86] Add constexpr support for addsub and select intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 6 ++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 32 ++++++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 31 +++++++++++++++++++++++ clang/lib/Headers/pmmintrin.h | 2 +- 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cd5f2c3012712..08a2be02d1929 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -93,7 +93,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } - let Features = "sse3" in { + let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; @@ -121,7 +121,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { +let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in { foreach Op = ["addsub", "max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; @@ -4124,7 +4124,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ef130c0a55df..3090d660dee3c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, F.subtract(RHS, RM); return F; }); + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop(); + const Pointer &LHS = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs(); + unsigned NumElts = VT->getNumElements(); + + using T = PrimConv::T; + for (unsigned I = 0; I < NumElts; ++I) { + APFloat LElem = LHS.elem(I).getAPFloat(); + APFloat RElem = RHS.elem(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem(I) = static_cast(LElem); + } + Dst.initializeAllElements(); + return true; + } case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: @@ -4433,6 +4463,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 972d9fe3b5e4f..58527eff88e6e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12889,6 +12889,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: @@ -13383,6 +13385,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElts = SourceLHS.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(NumElts); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + + for (unsigned I = 0; I < NumElts; ++I) { + APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); + APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LHS.subtract(RHS, RM); + } else { + // Odd indices: add + LHS.add(RHS, RM); + } + ResultElements.push_back(APValue(LHS)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 6b152bde29fc1..42bd343e326de 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -166,7 +166,7 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } From 05ab6c9909822d411178d31c923a735a868c4271 Mon Sep 17 00:00:00 2001 From: ahmed Date: Tue, 11 Nov 2025 16:18:27 +0200 Subject: [PATCH 2/3] feat: Add tests --- clang/include/clang/Basic/BuiltinsX86.td | 2 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 -- clang/lib/AST/ExprConstant.cpp | 2 -- clang/lib/Headers/avx512fintrin.h | 8 ++++---- clang/lib/Headers/avxintrin.h | 4 ++-- clang/lib/Headers/pmmintrin.h | 2 +- clang/test/CodeGen/X86/avx-builtins.c | 2 ++ clang/test/CodeGen/X86/sse3-builtins.c | 2 ++ 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 08a2be02d1929..7d110fc71e15d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -4124,7 +4124,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3090d660dee3c..8e73ec24902b2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4463,8 +4463,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 58527eff88e6e..3ba7520adf195 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12889,8 +12889,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 997e9608e112f..39a5c2d4c218c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8383,23 +8383,23 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 4aef9245323fb..33b8eaec1f99a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -147,7 +147,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); @@ -166,7 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 42bd343e326de..4b284c41181ca 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -60,7 +60,7 @@ _mm_lddqu_si128(__m128i_u const *__p) /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 737febbc7fef6..46bc28b85d8db 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0)); __m256 test_mm256_addsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_addsub_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f)); __m256d test_mm256_and_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_and_pd diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index a82dd4080670b..44389fbdc6f77 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0)); __m128 test_mm_addsub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_addsub_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f)); __m128d test_mm_hadd_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_hadd_pd From beebb7d6db98b7295cdbeb03ad37415f3695f872 Mon Sep 17 00:00:00 2001 From: ahmed Date: Tue, 11 Nov 2025 16:37:08 +0200 Subject: [PATCH 3/3] chore: update formatting --- clang/include/clang/Basic/BuiltinsX86.td | 7 ++++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- clang/lib/AST/ExprConstant.cpp | 2 +- clang/lib/Headers/avx512fintrin.h | 22 ++++++++++++++-------- clang/lib/Headers/avxintrin.h | 6 ++---- clang/lib/Headers/pmmintrin.h | 3 +-- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 7d110fc71e15d..f87644830c33b 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -92,8 +92,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; } - - let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + let Features = "sse3", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; @@ -121,7 +121,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in { +let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], + Features = "avx" in { foreach Op = ["addsub", "max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8e73ec24902b2..4bedb14c61fdb 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4292,7 +4292,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::RoundingMode RM = getRoundingMode(FPO); const auto *VT = Call->getArg(0)->getType()->castAs(); unsigned NumElts = VT->getNumElements(); - + using T = PrimConv::T; for (unsigned I = 0; I < NumElts; ++I) { APFloat LElem = LHS.elem(I).getAPFloat(); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3ba7520adf195..2489252268a49 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13397,7 +13397,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { SmallVector ResultElements; ResultElements.reserve(NumElts); llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); - + for (unsigned I = 0; I < NumElts; ++I) { APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 39a5c2d4c218c..ef6690b46ab44 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8383,24 +8383,30 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, + __mmask8 __U, + __m128 __A, + __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, + __m128 __A, + __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, + __mmask8 __U, + __m128d __A, + __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, + __m128d __A, + __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); } diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 33b8eaec1f99a..3e1618ed192c8 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -148,8 +148,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_addsub_pd(__m256d __a, __m256d __b) -{ +_mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } @@ -167,8 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_addsub_ps(__m256 __a, __m256 __b) -{ +_mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 4b284c41181ca..a9a65440363c3 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -61,8 +61,7 @@ _mm_lddqu_si128(__m128i_u const *__p) /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm_addsub_ps(__m128 __a, __m128 __b) -{ +_mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); }