From 6283f3d21efe332035daf4a726b6a174688e75f9 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Sat, 27 Sep 2025 18:19:08 +0530 Subject: [PATCH 01/12] [clang] make SSE/AVX/AVX512 sqrt constexpr Signed-off-by: Shreeyash Pandey --- clang/include/clang/Basic/BuiltinsX86.td | 3 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 ++ clang/lib/AST/ExprConstant.cpp | 36 ++++++++++++++++++++++++ clang/lib/Headers/emmintrin.h | 2 +- clang/test/CodeGen/X86/sse2-builtins.c | 2 ++ 5 files changed, 43 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 77e599587edc3..bddcd432f8857 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -222,7 +222,6 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; - def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">; @@ -295,6 +294,8 @@ let Features = "sse2", def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + + def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; } let Features = "sse3", Attributes = [NoThrow] in { diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 9076946d29657..7abe1b932f238 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SipHash.h" +#include "llvm/Support/raw_ostream.h" namespace clang { namespace interp { @@ -2996,6 +2997,7 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { + llvm::errs() << "Calling InterpretBuiltin\n"; if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) return Invalid(S, OpPC); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b706b14945b6d..e380f24b561d0 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -68,6 +68,7 @@ #include #include #include +#include #define DEBUG_TYPE "exprconstant" @@ -12235,6 +12236,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_sqrtpd: { + llvm::errs() << "We are inside sqrtpd\n"; + APValue Source; + if (!EvaluateAsRValue(Info, E->getArg(0), Source)) + return false; + + QualType DestEltTy = E->getType()->castAs()->getElementType(); + unsigned SourceLen = Source.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen); + + llvm::errs() << "SourceLen " << SourceLen << '\n'; + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + llvm::errs() << "We are inside for loop\n"; + APValue CurrentEle = Source.getVectorElt(EltNum); + if (DestEltTy->isFloatingType()) { + llvm::APFloat Value = CurrentEle.getFloat(); + if (Value.isNegative() && !Value.isZero()) { + Value = llvm::APFloat::getQNaN(Value.getSemantics()); + } else { + double DoubleValue = Value.convertToDouble(); + double SqrtValue = ::sqrt(DoubleValue); + llvm::APFloat Value2{SqrtValue}; + Value = Value2; + llvm::errs() << "Pushing " << SqrtValue << ' ' << Value2 << " to resultelements\n"; + } + ResultElements.push_back(APValue(Value)); + } else { + return false; + } + llvm::errs() << "Outside the loop, about to exit " << "res size " << ResultElements.size() << "\n"; + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + } } } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 6597e7e7d4030..fef9e280ba53f 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -256,7 +256,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the square roots of the /// values in the operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sqrt_pd(__m128d __a) { return __builtin_ia32_sqrtpd((__v2df)__a); } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 84b90c09444c2..a71546c2fb6ce 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1423,6 +1423,8 @@ __m128d test_mm_sqrt_pd(__m128d A) { return _mm_sqrt_pd(A); } +TEST_CONSTEXPR(match_m128d(_mm_sqrt_pd(_mm_set_pd(4.0,4.0)), 2.0, 2.0)); + __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_sqrt_sd // CHECK: extractelement <2 x double> %{{.*}}, i64 0 From 83c0718a996679a4ccb60589d155eb6bf4177ffd Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Wed, 1 Oct 2025 18:50:09 +0530 Subject: [PATCH 02/12] [clang] add sqrtpd and sqrtps Signed-off-by: Shreeyash Pandey --- clang/include/clang/Basic/BuiltinsX86.td | 5 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 77 +++++++++++++++++++++++- clang/lib/AST/ExprConstant.cpp | 22 ++++--- clang/lib/Headers/xmmintrin.h | 2 +- clang/test/CodeGen/X86/sse-builtins.c | 3 + clang/test/CodeGen/X86/sse2-builtins.c | 1 + 6 files changed, 98 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index bddcd432f8857..d49b7be7a8052 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -199,11 +199,14 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; - def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; } +let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; +} + let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 7abe1b932f238..da1cba76dd64f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SipHash.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang { namespace interp { @@ -2995,9 +2996,80 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +static llvm::APFloat apply_x86_sqrt(llvm::APFloat Val, const llvm::fltSemantics &Semantics) { + if (Val.isNegative() && !Val.isZero()) { + return llvm::APFloat::getQNaN(Semantics); + } else { + double DoubleValue = Val.convertToDouble(); + double SqrtValue = ::sqrt(DoubleValue); + + llvm::APFloat TempValue(SqrtValue); + + bool LosesInfo; + TempValue.convert(Semantics, llvm::APFloat::rmNearestTiesToEven, &LosesInfo); + return TempValue; + } +} + +static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + llvm::errs() << "Entering x86 sqrtpd/ps interpretbuiltin\n"; + + assert(Call->getNumArgs() == 1); + const Expr *ArgExpr = Call->getArg(0); + QualType ArgTy = ArgExpr->getType(); + QualType ResultTy = Call->getType(); + + if (!(ArgTy->isRealFloatingType() || + (ArgTy->isVectorType() && + ArgTy->castAs()->getElementType()->isRealFloatingType()))) + return false; + + const llvm::fltSemantics *SemanticsPtr; + if (ArgTy->isVectorType()) + SemanticsPtr = &S.getContext().getFloatSemantics(ArgTy->castAs()->getElementType()); + else + SemanticsPtr = &S.getContext().getFloatSemantics(ArgTy); + const llvm::fltSemantics &Semantics = *SemanticsPtr; + + + // Scalar case + if (!ArgTy->isVectorType()) { + llvm::APFloat Val = S.Stk.pop().getAPFloat(); + Val = apply_x86_sqrt(Val, Semantics); + S.Stk.push(Val); + return true; + } + + // Vector case + assert(ArgTy->isVectorType()); + llvm::errs() << "Considering this as a vector\n"; + const auto *VT = ArgTy->castAs(); + + const Pointer &Arg = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + assert(Arg.getFieldDesc()->isPrimitiveArray()); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + assert(Arg.getFieldDesc()->getNumElems() == Dst.getFieldDesc()->getNumElems()); + + PrimType ElemT = *S.getContext().classify(VT->getElementType()); + unsigned NumElems = VT->getNumElements(); + + for (unsigned I = 0; I != NumElems; ++I) { + llvm::APFloat Val = Arg.elem(I).getAPFloat(); + Val = apply_x86_sqrt(Val, Semantics); + Dst.elem(I) = Val; + } + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { - llvm::errs() << "Calling InterpretBuiltin\n"; + llvm::errs() << "Inside Interpretbuiltin for " << Call << "\n"; if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) return Invalid(S, OpPC); @@ -3755,6 +3827,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vinsertf128_si256: case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_sqrtpd: + case X86::BI__builtin_ia32_sqrtps: + return interp__builtin_x86_sqrt(S, OpPC, Call, BuiltinID); default: S.FFDiag(S.Current->getLocation(OpPC), diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e380f24b561d0..95228e7ef5169 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12236,13 +12236,15 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } - case X86::BI__builtin_ia32_sqrtpd: { - llvm::errs() << "We are inside sqrtpd\n"; + case X86::BI__builtin_ia32_sqrtpd: + case X86::BI__builtin_ia32_sqrtps: { + llvm::errs() << "We are inside sqrtpd/sqrtps\n"; APValue Source; if (!EvaluateAsRValue(Info, E->getArg(0), Source)) return false; QualType DestEltTy = E->getType()->castAs()->getElementType(); + const llvm::fltSemantics &Semantics = Info.Ctx.getFloatTypeSemantics(DestEltTy); // Retrieve correct semantics unsigned SourceLen = Source.getVectorLength(); SmallVector ResultElements; ResultElements.reserve(SourceLen); @@ -12257,19 +12259,21 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (Value.isNegative() && !Value.isZero()) { Value = llvm::APFloat::getQNaN(Value.getSemantics()); } else { - double DoubleValue = Value.convertToDouble(); - double SqrtValue = ::sqrt(DoubleValue); - llvm::APFloat Value2{SqrtValue}; - Value = Value2; - llvm::errs() << "Pushing " << SqrtValue << ' ' << Value2 << " to resultelements\n"; +double DoubleValue = Value.convertToDouble(); +double SqrtValue = sqrt(DoubleValue); +llvm::APFloat TempValue(SqrtValue); +bool LosesInfo; +auto RetStatus = TempValue.convert(Semantics, llvm::RoundingMode::NearestTiesToEven, &LosesInfo); +Value = TempValue; + //llvm::errs() << "Pushing " << SqrtValue << ' ' << Value2 << " to resultelements\n"; } ResultElements.push_back(APValue(Value)); } else { return false; } - llvm::errs() << "Outside the loop, about to exit " << "res size " << ResultElements.size() << "\n"; - return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + llvm::errs() << "Outside the loop, about to exit " << "res size " << ResultElements.size() << "\n"; + return Success(APValue(ResultElements.data(), ResultElements.size()), E); } } } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d876b4735a7d2..2a41324dd31c6 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -248,7 +248,7 @@ _mm_sqrt_ss(__m128 __a) /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the square roots of the /// values in the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sqrt_ps(__m128 __a) { return __builtin_ia32_sqrtps((__v4sf)__a); diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 3bad3426b1586..929dad963ce52 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -610,6 +610,7 @@ __m128 test_mm_rsqrt_ps(__m128 x) { return _mm_rsqrt_ps(x); } + __m128 test_mm_rsqrt_ss(__m128 x) { // CHECK-LABEL: test_mm_rsqrt_ss // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}}) @@ -742,6 +743,8 @@ __m128 test_mm_sqrt_ps(__m128 x) { return _mm_sqrt_ps(x); } +TEST_CONSTEXPR(match_m128(_mm_sqrt_ps(_mm_set_ps(16.0f, 9.0f, 4.0f, 1.0f)), 1.0f, 2.0f, 3.0f, 4.0f)); + __m128 test_mm_sqrt_ss(__m128 x) { // CHECK-LABEL: test_mm_sqrt_ss // CHECK: extractelement <4 x float> {{.*}}, i64 0 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index a71546c2fb6ce..2e009ba432207 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1433,6 +1433,7 @@ __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { return _mm_sqrt_sd(A, B); } + __m128i test_mm_sra_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sra_epi16 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) From bece3db5a8411368d4d9d24eb5d5cbc3d2d73b53 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Wed, 1 Oct 2025 20:49:03 +0530 Subject: [PATCH 03/12] [clang] add sqrt{pd|ps}256 Signed-off-by: Shreeyash Pandey --- clang/include/clang/Basic/BuiltinsX86.td | 4 ++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 ++ clang/lib/AST/ExprConstant.cpp | 4 +++- clang/lib/Headers/avxintrin.h | 4 ++-- clang/test/CodeGen/X86/avx-builtins.c | 4 ++++ 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index d49b7be7a8052..b41540c670362 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -504,13 +504,13 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; + def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; + def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; } let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; - def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; - def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index da1cba76dd64f..fe5090dbd94c1 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3829,6 +3829,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtpd256: + case X86::BI__builtin_ia32_sqrtps256: return interp__builtin_x86_sqrt(S, OpPC, Call, BuiltinID); default: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 95228e7ef5169..67102e55f84d2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12237,7 +12237,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } case X86::BI__builtin_ia32_sqrtpd: - case X86::BI__builtin_ia32_sqrtps: { + case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtpd256: + case X86::BI__builtin_ia32_sqrtps256: { llvm::errs() << "We are inside sqrtpd/sqrtps\n"; APValue Source; if (!EvaluateAsRValue(Info, E->getArg(0), Source)) diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index a7f70994be9db..c794b334ab9c0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -335,7 +335,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the square roots of the /// values in the operand. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sqrt_pd(__m256d __a) { return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); @@ -352,7 +352,7 @@ _mm256_sqrt_pd(__m256d __a) /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the square roots of the /// values in the operand. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 347cd9ee6a667..28132883d4125 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1871,12 +1871,16 @@ __m256d test_mm256_sqrt_pd(__m256d A) { return _mm256_sqrt_pd(A); } +TEST_CONSTEXPR(match_m256d(_mm256_sqrt_pd(_mm256_set_pd(16.0, 9.0, 4.0, 1.0)), 1.0, 2.0, 3.0, 4.0)); + __m256 test_mm256_sqrt_ps(__m256 A) { // CHECK-LABEL: test_mm256_sqrt_ps // CHECK: call {{.*}}<8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.*}}) return _mm256_sqrt_ps(A); } +TEST_CONSTEXPR(match_m256(_mm256_sqrt_ps(_mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)); + void test_mm256_store_pd(double* A, __m256d B) { // CHECK-LABEL: test_mm256_store_pd // CHECK: store <4 x double> %{{.*}}, ptr %{{.*}}, align 32 From bd73af2d210cff9249cb3848879d82281b634ed9 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 17:23:53 +0530 Subject: [PATCH 04/12] [clang] add sqrt{pd|ps}512 Signed-off-by: Shreeyash Pandey --- clang/include/clang/Basic/BuiltinsX86.td | 2 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 19 ++++++++++++++++++- clang/lib/AST/ExprConstant.cpp | 4 +++- clang/lib/Headers/avx512fintrin.h | 4 ++-- clang/test/CodeGen/X86/avx512f-builtins.c | 4 ++++ 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b41540c670362..85f99e538c84a 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -950,7 +950,7 @@ let Features = "pku", Attributes = [NoThrow] in { def wrpkru : X86Builtin<"void(unsigned int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def sqrtpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; def sqrtps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index fe5090dbd94c1..6be22308c0c25 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3016,7 +3016,12 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, unsigned ID) { llvm::errs() << "Entering x86 sqrtpd/ps interpretbuiltin\n"; - assert(Call->getNumArgs() == 1); + llvm::errs() << "BI__builtin_ia32_sqrtpd512 " << X86::BI__builtin_ia32_sqrtpd512 << '\n'; + llvm::errs() << "BI__builtin_ia32_sqrtps512 " << X86::BI__builtin_ia32_sqrtps512 << '\n'; + llvm::errs() << "Current ID " << ID << '\n'; + llvm::errs() << "GetNumArgs " << Call->getNumArgs() << '\n'; + unsigned NumArgs = Call->getNumArgs(); + assert(NumArgs == 1 || NumArgs == 2); const Expr *ArgExpr = Call->getArg(0); QualType ArgTy = ArgExpr->getType(); QualType ResultTy = Call->getType(); @@ -3033,6 +3038,16 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, SemanticsPtr = &S.getContext().getFloatSemantics(ArgTy); const llvm::fltSemantics &Semantics = *SemanticsPtr; + if (NumArgs == 2) { + if (!Call->getArg(1)->getType()->isIntegerType()) { + return false; + } + APSInt RoundingMode = popToAPSInt(S, Call->getArg(1)); + if (RoundingMode.getZExtValue() != 4) { + return false; + } + } + // Scalar case if (!ArgTy->isVectorType()) { @@ -3831,6 +3846,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_sqrtps: case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtps256: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: return interp__builtin_x86_sqrt(S, OpPC, Call, BuiltinID); default: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 67102e55f84d2..a5550996b1fcd 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12239,7 +12239,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps: case X86::BI__builtin_ia32_sqrtpd256: - case X86::BI__builtin_ia32_sqrtps256: { + case X86::BI__builtin_ia32_sqrtps256: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: { llvm::errs() << "We are inside sqrtpd/sqrtps\n"; APValue Source; if (!EvaluateAsRValue(Info, E->getArg(0), Source)) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 80e58425cdd71..e5b54dd7285eb 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1458,7 +1458,7 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sqrt_pd(__m512d __A) { return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, @@ -1494,7 +1494,7 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sqrt_ps(__m512 __A) { return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 84eaad8d99e61..c45139489a047 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -18,6 +18,8 @@ __m512d test_mm512_sqrt_pd(__m512d a) return _mm512_sqrt_pd(a); } +TEST_CONSTEXPR(match_m512d(_mm512_sqrt_pd(_mm512_set_pd(16.0, 9.0, 4.0, 1.0, 16.0, 9.0, 4.0, 1.0)), 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0)); + __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_sqrt_pd @@ -68,6 +70,8 @@ __m512 test_mm512_sqrt_ps(__m512 a) return _mm512_sqrt_ps(a); } +TEST_CONSTEXPR(match_m512(_mm512_sqrt_ps(_mm512_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f, 64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)); + __m512 test_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { // CHECK-LABEL: test_mm512_mask_sqrt_ps From 69a4e222674ff0e0db7323756f80ee3743ece7b5 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 18:04:07 +0530 Subject: [PATCH 05/12] [clang] add masked sqrt Signed-off-by: Shreeyash Pandey --- clang/lib/Headers/avx512vlintrin.h | 16 +++++++-------- clang/test/CodeGen/X86/avx512vl-builtins.c | 24 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 754f43ad88543..ea41f975546b7 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -3444,56 +3444,56 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8si)(__m256i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) - static __inline__ __m128d __DEFAULT_FN_ATTRS128 + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)__W); } - static __inline__ __m128d __DEFAULT_FN_ATTRS128 + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)__W); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)__W); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)_mm_setzero_ps()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)__W); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 5282c7ab06dea..67cab0097b641 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -5514,48 +5514,72 @@ __m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_sqrt_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_mask_sqrt_pd(_mm_set_pd(200.0, 100.0), 0b01, _mm_set_pd(9.0, 4.0)), 2.0, 200.0)); + __m128d test_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_maskz_sqrt_pd // CHECK: @llvm.sqrt.v2f64 // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_sqrt_pd(__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_maskz_sqrt_pd(0b10, _mm_set_pd(9.0, 4.0)), 0.0, 3.0)); + __m256d test_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_mask_sqrt_pd // CHECK: @llvm.sqrt.v4f64 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_sqrt_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_mask_sqrt_pd(_mm256_set_pd(400.0, 300.0, 200.0, 100.0), 0b1001, _mm256_set_pd(25.0, 16.0, 9.0, 4.0)), 2.0, 200.0, 300.0, 5.0)); + __m256d test_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_maskz_sqrt_pd // CHECK: @llvm.sqrt.v4f64 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_sqrt_pd(__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_maskz_sqrt_pd(0b0110, _mm256_set_pd(25.0, 16.0, 9.0, 4.0)), 0.0, 3.0, 4.0, 0.0)); + __m128 test_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_mask_sqrt_ps // CHECK: @llvm.sqrt.v4f32 // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_sqrt_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_mask_sqrt_ps(_mm_set_ps(400.0f, 300.0f, 200.0f, 100.0f), 0b1010, _mm_set_ps(25.0f, 16.0f, 9.0f, 4.0f)), 100.0f, 3.0f, 300.0f, 5.0f)); + __m128 test_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_maskz_sqrt_ps // CHECK: @llvm.sqrt.v4f32 // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_sqrt_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_maskz_sqrt_ps(0b0011, _mm_set_ps(25.0f, 16.0f, 9.0f, 4.0f)), 2.0f, 3.0f, 0.0f, 0.0f)); + __m256 test_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_mask_sqrt_ps // CHECK: @llvm.sqrt.v8f32 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_sqrt_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_mask_sqrt_ps(_mm256_set_ps(800.0f, 700.0f, 600.0f, 500.0f, 400.0f, 300.0f, 200.0f, 100.0f), 0b11001100, _mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 100.0f, 200.0f, 3.0f, 4.0f, 500.0f, 600.0f, 7.0f, 8.0f)); + __m256 test_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_maskz_sqrt_ps // CHECK: @llvm.sqrt.v8f32 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_sqrt_ps(__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_maskz_sqrt_ps(0b11110000, _mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 0.0f, 0.0f, 0.0f, 0.0f, 5.0f, 6.0f, 7.0f, 8.0f)); + __m128d test_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_mask_sub_pd // CHECK: fsub <2 x double> %{{.*}}, %{{.*}} From 8e50cab159306fd2b132f7ca3377510f6495fc1c Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 18:04:56 +0530 Subject: [PATCH 06/12] [clang] sqrt formatting Signed-off-by: Shreeyash Pandey --- clang/include/clang/Basic/BuiltinsX86.td | 13 ++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 28 ++++---- clang/lib/AST/ExprConstant.cpp | 30 ++++---- clang/lib/Headers/avx512fintrin.h | 10 ++- clang/lib/Headers/avx512vlintrin.h | 88 +++++++++++------------- clang/lib/Headers/avxintrin.h | 7 +- clang/lib/Headers/emmintrin.h | 3 +- clang/lib/Headers/xmmintrin.h | 4 +- 8 files changed, 91 insertions(+), 92 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 85f99e538c84a..7d9845fbf4cf5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -203,7 +203,8 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; } -let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "sse", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; } @@ -224,7 +225,9 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; - def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; + def psadbw128 + : X86Builtin< + "_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">; @@ -510,7 +513,8 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; + def vpermilps256 + : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; @@ -950,7 +954,8 @@ let Features = "pku", Attributes = [NoThrow] in { def wrpkru : X86Builtin<"void(unsigned int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512f", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def sqrtpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; def sqrtps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6be22308c0c25..ad3443fb162a1 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2996,7 +2996,8 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } -static llvm::APFloat apply_x86_sqrt(llvm::APFloat Val, const llvm::fltSemantics &Semantics) { +static llvm::APFloat apply_x86_sqrt(llvm::APFloat Val, + const llvm::fltSemantics &Semantics) { if (Val.isNegative() && !Val.isZero()) { return llvm::APFloat::getQNaN(Semantics); } else { @@ -3006,18 +3007,20 @@ static llvm::APFloat apply_x86_sqrt(llvm::APFloat Val, const llvm::fltSemantics llvm::APFloat TempValue(SqrtValue); bool LosesInfo; - TempValue.convert(Semantics, llvm::APFloat::rmNearestTiesToEven, &LosesInfo); + TempValue.convert(Semantics, llvm::APFloat::rmNearestTiesToEven, + &LosesInfo); return TempValue; } } static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, - const CallExpr *Call, - unsigned ID) { + const CallExpr *Call, unsigned ID) { llvm::errs() << "Entering x86 sqrtpd/ps interpretbuiltin\n"; - - llvm::errs() << "BI__builtin_ia32_sqrtpd512 " << X86::BI__builtin_ia32_sqrtpd512 << '\n'; - llvm::errs() << "BI__builtin_ia32_sqrtps512 " << X86::BI__builtin_ia32_sqrtps512 << '\n'; + + llvm::errs() << "BI__builtin_ia32_sqrtpd512 " + << X86::BI__builtin_ia32_sqrtpd512 << '\n'; + llvm::errs() << "BI__builtin_ia32_sqrtps512 " + << X86::BI__builtin_ia32_sqrtps512 << '\n'; llvm::errs() << "Current ID " << ID << '\n'; llvm::errs() << "GetNumArgs " << Call->getNumArgs() << '\n'; unsigned NumArgs = Call->getNumArgs(); @@ -3025,7 +3028,7 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, const Expr *ArgExpr = Call->getArg(0); QualType ArgTy = ArgExpr->getType(); QualType ResultTy = Call->getType(); - + if (!(ArgTy->isRealFloatingType() || (ArgTy->isVectorType() && ArgTy->castAs()->getElementType()->isRealFloatingType()))) @@ -3033,7 +3036,8 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *SemanticsPtr; if (ArgTy->isVectorType()) - SemanticsPtr = &S.getContext().getFloatSemantics(ArgTy->castAs()->getElementType()); + SemanticsPtr = &S.getContext().getFloatSemantics( + ArgTy->castAs()->getElementType()); else SemanticsPtr = &S.getContext().getFloatSemantics(ArgTy); const llvm::fltSemantics &Semantics = *SemanticsPtr; @@ -3048,7 +3052,6 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, } } - // Scalar case if (!ArgTy->isVectorType()) { llvm::APFloat Val = S.Stk.pop().getAPFloat(); @@ -3061,13 +3064,14 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, assert(ArgTy->isVectorType()); llvm::errs() << "Considering this as a vector\n"; const auto *VT = ArgTy->castAs(); - + const Pointer &Arg = S.Stk.pop(); const Pointer &Dst = S.Stk.peek(); assert(Arg.getFieldDesc()->isPrimitiveArray()); assert(Dst.getFieldDesc()->isPrimitiveArray()); - assert(Arg.getFieldDesc()->getNumElems() == Dst.getFieldDesc()->getNumElems()); + assert(Arg.getFieldDesc()->getNumElems() == + Dst.getFieldDesc()->getNumElems()); PrimType ElemT = *S.getContext().classify(VT->getElementType()); unsigned NumElems = VT->getNumElements(); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a5550996b1fcd..9148c699b6b59 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -64,11 +64,11 @@ #include "llvm/Support/SipHash.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include #include -#include #define DEBUG_TYPE "exprconstant" @@ -12236,10 +12236,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } - case X86::BI__builtin_ia32_sqrtpd: - case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtpd: + case X86::BI__builtin_ia32_sqrtps: case X86::BI__builtin_ia32_sqrtpd256: - case X86::BI__builtin_ia32_sqrtps256: + case X86::BI__builtin_ia32_sqrtps256: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { llvm::errs() << "We are inside sqrtpd/sqrtps\n"; @@ -12248,7 +12248,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; QualType DestEltTy = E->getType()->castAs()->getElementType(); - const llvm::fltSemantics &Semantics = Info.Ctx.getFloatTypeSemantics(DestEltTy); // Retrieve correct semantics + const llvm::fltSemantics &Semantics = + Info.Ctx.getFloatTypeSemantics(DestEltTy); // Retrieve correct semantics unsigned SourceLen = Source.getVectorLength(); SmallVector ResultElements; ResultElements.reserve(SourceLen); @@ -12263,20 +12264,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (Value.isNegative() && !Value.isZero()) { Value = llvm::APFloat::getQNaN(Value.getSemantics()); } else { -double DoubleValue = Value.convertToDouble(); -double SqrtValue = sqrt(DoubleValue); -llvm::APFloat TempValue(SqrtValue); -bool LosesInfo; -auto RetStatus = TempValue.convert(Semantics, llvm::RoundingMode::NearestTiesToEven, &LosesInfo); -Value = TempValue; - //llvm::errs() << "Pushing " << SqrtValue << ' ' << Value2 << " to resultelements\n"; + double DoubleValue = Value.convertToDouble(); + double SqrtValue = sqrt(DoubleValue); + llvm::APFloat TempValue(SqrtValue); + bool LosesInfo; + auto RetStatus = TempValue.convert( + Semantics, llvm::RoundingMode::NearestTiesToEven, &LosesInfo); + Value = TempValue; + // llvm::errs() << "Pushing " << SqrtValue << ' ' << Value2 << " to + // resultelements\n"; } ResultElements.push_back(APValue(Value)); } else { return false; } } - llvm::errs() << "Outside the loop, about to exit " << "res size " << ResultElements.size() << "\n"; + llvm::errs() << "Outside the loop, about to exit " << "res size " + << ResultElements.size() << "\n"; return Success(APValue(ResultElements.data(), ResultElements.size()), E); } } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e5b54dd7285eb..4ec4ab05ab468 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1458,9 +1458,8 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sqrt_pd(__m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sqrt_pd(__m512d __A) { return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, _MM_FROUND_CUR_DIRECTION); } @@ -1494,9 +1493,8 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sqrt_ps(__m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sqrt_ps(__m512 __A) { return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, _MM_FROUND_CUR_DIRECTION); } diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index ea41f975546b7..99351610cb1fc 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -3444,61 +3444,53 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8si)(__m256i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) - static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR - _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)__W); - } +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)__W); +} - static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR - _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)_mm_setzero_pd()); - } +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)_mm_setzero_pd()); +} - static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR - _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)__W); - } +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)__W); +} - static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR - _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)_mm256_setzero_pd()); - } +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)_mm256_setzero_pd()); +} - static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR - _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)__W); - } +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)__W); +} - static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR - _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)_mm_setzero_ps()); - } +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)_mm_setzero_ps()); +} - static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR - _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)__W); - } +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)__W); +} - static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR - _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)_mm256_setzero_ps()); - } +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)_mm256_setzero_ps()); +} static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index c794b334ab9c0..2ef8d5085869f 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -336,8 +336,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, /// \returns A 256-bit vector of [4 x double] containing the square roots of the /// values in the operand. static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_sqrt_pd(__m256d __a) -{ +_mm256_sqrt_pd(__m256d __a) { return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); } @@ -352,9 +351,7 @@ _mm256_sqrt_pd(__m256d __a) /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the square roots of the /// values in the operand. -static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_sqrt_ps(__m256 __a) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index fef9e280ba53f..1943624f3424e 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -256,7 +256,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the square roots of the /// values in the operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sqrt_pd(__m128d __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sqrt_pd(__m128d __a) { return __builtin_ia32_sqrtpd((__v2df)__a); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 2a41324dd31c6..7c68c1395df5c 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -248,9 +248,7 @@ _mm_sqrt_ss(__m128 __a) /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the square roots of the /// values in the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm_sqrt_ps(__m128 __a) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sqrt_ps(__m128 __a) { return __builtin_ia32_sqrtps((__v4sf)__a); } From 427920598f556102034af58f8e1712c2b69653df Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 18:08:31 +0530 Subject: [PATCH 07/12] [clang] remove debug messages Signed-off-by: Shreeyash Pandey --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 11 ----------- clang/lib/AST/ExprConstant.cpp | 8 -------- 2 files changed, 19 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ad3443fb162a1..42492784b2e1b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SipHash.h" -#include "llvm/Support/raw_ostream.h" #include namespace clang { @@ -3015,14 +3014,6 @@ static llvm::APFloat apply_x86_sqrt(llvm::APFloat Val, static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { - llvm::errs() << "Entering x86 sqrtpd/ps interpretbuiltin\n"; - - llvm::errs() << "BI__builtin_ia32_sqrtpd512 " - << X86::BI__builtin_ia32_sqrtpd512 << '\n'; - llvm::errs() << "BI__builtin_ia32_sqrtps512 " - << X86::BI__builtin_ia32_sqrtps512 << '\n'; - llvm::errs() << "Current ID " << ID << '\n'; - llvm::errs() << "GetNumArgs " << Call->getNumArgs() << '\n'; unsigned NumArgs = Call->getNumArgs(); assert(NumArgs == 1 || NumArgs == 2); const Expr *ArgExpr = Call->getArg(0); @@ -3062,7 +3053,6 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, // Vector case assert(ArgTy->isVectorType()); - llvm::errs() << "Considering this as a vector\n"; const auto *VT = ArgTy->castAs(); const Pointer &Arg = S.Stk.pop(); @@ -3088,7 +3078,6 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { - llvm::errs() << "Inside Interpretbuiltin for " << Call << "\n"; if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) return Invalid(S, OpPC); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 9148c699b6b59..445edfb66b518 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12242,7 +12242,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_sqrtps256: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { - llvm::errs() << "We are inside sqrtpd/sqrtps\n"; APValue Source; if (!EvaluateAsRValue(Info, E->getArg(0), Source)) return false; @@ -12254,10 +12253,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { SmallVector ResultElements; ResultElements.reserve(SourceLen); - llvm::errs() << "SourceLen " << SourceLen << '\n'; - for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { - llvm::errs() << "We are inside for loop\n"; APValue CurrentEle = Source.getVectorElt(EltNum); if (DestEltTy->isFloatingType()) { llvm::APFloat Value = CurrentEle.getFloat(); @@ -12271,16 +12267,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { auto RetStatus = TempValue.convert( Semantics, llvm::RoundingMode::NearestTiesToEven, &LosesInfo); Value = TempValue; - // llvm::errs() << "Pushing " << SqrtValue << ' ' << Value2 << " to - // resultelements\n"; } ResultElements.push_back(APValue(Value)); } else { return false; } } - llvm::errs() << "Outside the loop, about to exit " << "res size " - << ResultElements.size() << "\n"; return Success(APValue(ResultElements.data(), ResultElements.size()), E); } } From 67efeb82808b3b274296e0b481ff7cf0acc8de5e Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 18:17:13 +0530 Subject: [PATCH 08/12] [clang] remove redundant comment Signed-off-by: Shreeyash Pandey --- clang/lib/AST/ExprConstant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 445edfb66b518..761648486d3ac 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12248,7 +12248,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { QualType DestEltTy = E->getType()->castAs()->getElementType(); const llvm::fltSemantics &Semantics = - Info.Ctx.getFloatTypeSemantics(DestEltTy); // Retrieve correct semantics + Info.Ctx.getFloatTypeSemantics(DestEltTy); unsigned SourceLen = Source.getVectorLength(); SmallVector ResultElements; ResultElements.reserve(SourceLen); From d33f881c011131c0e4071395b5277be49c7a5ac9 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 18:32:08 +0530 Subject: [PATCH 09/12] [clang] remove unused variables Signed-off-by: Shreeyash Pandey --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 -- clang/lib/AST/ExprConstant.cpp | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 42492784b2e1b..63da40f621bd6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3018,7 +3018,6 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, assert(NumArgs == 1 || NumArgs == 2); const Expr *ArgExpr = Call->getArg(0); QualType ArgTy = ArgExpr->getType(); - QualType ResultTy = Call->getType(); if (!(ArgTy->isRealFloatingType() || (ArgTy->isVectorType() && @@ -3063,7 +3062,6 @@ static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, assert(Arg.getFieldDesc()->getNumElems() == Dst.getFieldDesc()->getNumElems()); - PrimType ElemT = *S.getContext().classify(VT->getElementType()); unsigned NumElems = VT->getNumElements(); for (unsigned I = 0; I != NumElems; ++I) { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 761648486d3ac..242ccfeb5b4e2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12264,8 +12264,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { double SqrtValue = sqrt(DoubleValue); llvm::APFloat TempValue(SqrtValue); bool LosesInfo; - auto RetStatus = TempValue.convert( - Semantics, llvm::RoundingMode::NearestTiesToEven, &LosesInfo); + TempValue.convert(Semantics, llvm::RoundingMode::NearestTiesToEven, + &LosesInfo); Value = TempValue; } ResultElements.push_back(APValue(Value)); From 7ace3ac7c9f5943011bd3bd4b1ab5c7ddb3bdcdb Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 20:01:30 +0530 Subject: [PATCH 10/12] [clang] add masked sqrt 512 Signed-off-by: Shreeyash Pandey --- clang/lib/Headers/avx512fintrin.h | 20 ++++++++------------ clang/test/CodeGen/X86/avx512f-builtins.c | 10 ++++++++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 4ec4ab05ab468..3055911b970d8 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1464,17 +1464,15 @@ _mm512_sqrt_pd(__m512d __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)_mm512_setzero_pd()); @@ -1499,17 +1497,15 @@ _mm512_sqrt_ps(__m512 __A) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)_mm512_setzero_ps()); diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index c45139489a047..3fa6872633c87 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -29,6 +29,12 @@ __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) return _mm512_mask_sqrt_pd (__W,__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_sqrt_pd( + _mm512_set_pd(800.0, 700.0, 600.0, 500.0, 400.0, 300.0, 200.0, 100.0), + 0b11000011, + _mm512_set_pd(64.0, 49.0, 36.0, 25.0, 16.0, 9.0, 4.0, 1.0)), + 1.0, 2.0, 300.0, 400.0, 500.0, 600.0, 7.0, 8.0)); + __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_maskz_sqrt_pd @@ -38,6 +44,10 @@ __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) return _mm512_maskz_sqrt_pd (__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_sqrt_pd(0b00001111, + _mm512_set_pd(64.0, 49.0, 36.0, 25.0, 16.0, 9.0, 4.0, 1.0)), + 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0)); + __m512d test_mm512_mask_sqrt_round_pd(__m512d __W,__mmask8 __U,__m512d __A) { // CHECK-LABEL: test_mm512_mask_sqrt_round_pd From f8d93f3a8aaeeb6fb46b4019d3539df03d35be24 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 20:21:55 +0530 Subject: [PATCH 11/12] [clang] fix formatting Signed-off-by: Shreeyash Pandey --- clang/lib/Headers/avx512fintrin.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 3055911b970d8..ad8de700aa351 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1458,8 +1458,7 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sqrt_pd(__m512d __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sqrt_pd(__m512d __A) { return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, _MM_FROUND_CUR_DIRECTION); } @@ -1491,8 +1490,7 @@ _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) { (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sqrt_ps(__m512 __A) { +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sqrt_ps(__m512 __A) { return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, _MM_FROUND_CUR_DIRECTION); } From 91f3f5a7127cb95d8df57ff028365ef3660a1762 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Thu, 2 Oct 2025 20:22:35 +0530 Subject: [PATCH 12/12] [clang] fix formatting Signed-off-by: Shreeyash Pandey --- clang/lib/Headers/avx512fintrin.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index ad8de700aa351..3055911b970d8 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1458,7 +1458,8 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sqrt_pd(__m512d __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sqrt_pd(__m512d __A) { return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, _MM_FROUND_CUR_DIRECTION); } @@ -1490,7 +1491,8 @@ _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) { (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sqrt_ps(__m512 __A) { +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sqrt_ps(__m512 __A) { return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, _MM_FROUND_CUR_DIRECTION); }