From c7198dede7b1cf016dba9eae812aefe93df05e91 Mon Sep 17 00:00:00 2001 From: mohxen Date: Sun, 23 Nov 2025 18:09:42 -0500 Subject: [PATCH] First draft: [Clang] Add constant evaluation support for x86 psadbw/psadbw128 intrinsic --- clang/lib/AST/ExprConstant.cpp | 53 +++++++++++++++++++ .../AST/ByteCode/x86-psadbw-psadbw128.cpp | 21 ++++++++ 2 files changed, 74 insertions(+) create mode 100644 clang/test/AST/ByteCode/x86-psadbw-psadbw128.cpp diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 29357eec2eeb6..40bed5a974d79 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1901,6 +1901,8 @@ static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result, EvalInfo &Info); +static bool EvaluatePSADBW128(const CallExpr *E, EvalInfo &Info, APValue &Result); + //===----------------------------------------------------------------------===// // Misc utilities //===----------------------------------------------------------------------===// @@ -12078,6 +12080,54 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, return true; } +static bool EvaluatePSADBW128(const CallExpr *E, EvalInfo &Info, APValue &Result) { + // 1) Evaluate the arguments into APValues + APValue A, B; + if (!Evaluate(A, Info, E->getArg(0)) || + !Evaluate(B, Info, E->getArg(1))) + return false; + + if (!A.isVector() || !B.isVector()) + return false; + + unsigned Len = A.getVectorLength(); + if (Len != 16) // psadbw128 uses 16 bytes (2 × 8) + return false; + + // 2) Compute SAD over two 8-byte blocks + uint64_t Sum0 = 0; + uint64_t Sum1 = 0; + + // bytes 0..7 + for (unsigned i = 0; i < 8; ++i) { + uint64_t a = A.getVectorElt(i).getInt().getZExtValue(); + uint64_t b = B.getVectorElt(i).getInt().getZExtValue(); + Sum0 += (a > b ? a - b : b - a); + } + + // bytes 8..15 + for (unsigned i = 8; i < 16; ++i) { + uint64_t a = A.getVectorElt(i).getInt().getZExtValue(); + uint64_t b = B.getVectorElt(i).getInt().getZExtValue(); + Sum1 += (a > b ? a - b : b - a); + } + + // 3) Build result vector of two 64-bit elements + SmallVector Elts; + QualType ElemTy = E->getType()->castAs()->getElementType(); + bool Unsigned = ElemTy->isUnsignedIntegerType(); + unsigned BW = Info.Ctx.getIntWidth(ElemTy); // usually 64 + + Elts.emplace_back(APValue(APSInt(APInt(BW, Sum0), Unsigned))); + Elts.emplace_back(APValue(APSInt(APInt(BW, Sum1), Unsigned))); + + // APValue(const APValue *E, unsigned N) – copies the elements + Result = APValue(Elts.data(), Elts.size()); + + return true; +} + + static bool evalShuffleGeneric( EvalInfo &Info, const CallExpr *Call, APValue &Out, llvm::function_ref(unsigned, unsigned)> @@ -12342,6 +12392,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_pavgw512: return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU); + case X86::BI__builtin_ia32_psadbw128: + return EvaluatePSADBW128(E, Info, Result); + case clang::X86::BI__builtin_ia32_pmulhrsw128: case clang::X86::BI__builtin_ia32_pmulhrsw256: case clang::X86::BI__builtin_ia32_pmulhrsw512: diff --git a/clang/test/AST/ByteCode/x86-psadbw-psadbw128.cpp b/clang/test/AST/ByteCode/x86-psadbw-psadbw128.cpp new file mode 100644 index 0000000000000..86b297ce442d8 --- /dev/null +++ b/clang/test/AST/ByteCode/x86-psadbw-psadbw128.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -O0 \ +// RUN: -fexperimental-new-constant-interpreter -verify %s + +// No headers allowed. Use Clang's vector types directly. + +typedef char v16qi __attribute__((vector_size(16))); +typedef long long v2di __attribute__((vector_size(16))); + +constexpr v2di test_psadbw128() { + v16qi a = {10,20,30,40,50,60,70,80, + 1,2,3,4,5,6,7,8}; + + v16qi b = {5,15,25,45,55,55,75,85, + 10,0,3,9,1,10,2,8}; + + // Call the builtin directly + return __builtin_ia32_psadbw128(a, b); +} + +static_assert(test_psadbw128()[0] == 40, "block0 mismatch"); +static_assert(test_psadbw128()[1] == 29, "block1 mismatch");