llvm · Mohxen · Nov 23, 2025 · RKSimon · Nov 25, 2025 · RKSimon
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
@@ -1901,6 +1901,8 @@ static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
 static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result,
                                EvalInfo &Info);
 
+static bool EvaluatePSADBW128(const CallExpr *E, EvalInfo &Info, APValue &Result);
+
 //===----------------------------------------------------------------------===//
 // Misc utilities
 //===----------------------------------------------------------------------===//
@@ -12078,6 +12080,54 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
   return true;
 }
 
+static bool EvaluatePSADBW128(const CallExpr *E, EvalInfo &Info, APValue &Result) {
+  // 1) Evaluate the arguments into APValues
+  APValue A, B;
+  if (!Evaluate(A, Info, E->getArg(0)) ||
+      !Evaluate(B, Info, E->getArg(1)))
+    return false;
+
+  if (!A.isVector() || !B.isVector())
+    return false;
+
+  unsigned Len = A.getVectorLength();
+  if (Len != 16) // psadbw128 uses 16 bytes (2 × 8)
+    return false;
+
+  // 2) Compute SAD over two 8-byte blocks
+  uint64_t Sum0 = 0;
+  uint64_t Sum1 = 0;
+
+  // bytes 0..7
+  for (unsigned i = 0; i < 8; ++i) {
+    uint64_t a = A.getVectorElt(i).getInt().getZExtValue();
+    uint64_t b = B.getVectorElt(i).getInt().getZExtValue();
+    Sum0 += (a > b ? a - b : b - a);
+  }
+
+  // bytes 8..15
+  for (unsigned i = 8; i < 16; ++i) {
+    uint64_t a = A.getVectorElt(i).getInt().getZExtValue();
+    uint64_t b = B.getVectorElt(i).getInt().getZExtValue();
+    Sum1 += (a > b ? a - b : b - a);
+  }
+
+  // 3) Build result vector of two 64-bit elements
+  SmallVector<APValue, 2> Elts;
+  QualType ElemTy = E->getType()->castAs<VectorType>()->getElementType();
+  bool Unsigned = ElemTy->isUnsignedIntegerType();
+  unsigned BW = Info.Ctx.getIntWidth(ElemTy); // usually 64
+
+  Elts.emplace_back(APValue(APSInt(APInt(BW, Sum0), Unsigned)));
+  Elts.emplace_back(APValue(APSInt(APInt(BW, Sum1), Unsigned)));
+
+  // APValue(const APValue *E, unsigned N) – copies the elements
+  Result = APValue(Elts.data(), Elts.size());
+
+  return true;
+}
+
+
 static bool evalShuffleGeneric(
     EvalInfo &Info, const CallExpr *Call, APValue &Out,
     llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
@@ -12342,6 +12392,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   case clang::X86::BI__builtin_ia32_pavgw512:
     return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU);
 
+  case X86::BI__builtin_ia32_psadbw128:
+  return EvaluatePSADBW128(E, Info, Result);
+
   case clang::X86::BI__builtin_ia32_pmulhrsw128:
   case clang::X86::BI__builtin_ia32_pmulhrsw256:
   case clang::X86::BI__builtin_ia32_pmulhrsw512:

diff --git a/clang/test/AST/ByteCode/x86-psadbw-psadbw128.cpp b/clang/test/AST/ByteCode/x86-psadbw-psadbw128.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -O0 \
+// RUN:   -fexperimental-new-constant-interpreter -verify %s
+
+// No headers allowed. Use Clang's vector types directly.
+
+typedef char v16qi __attribute__((vector_size(16)));
+typedef long long v2di __attribute__((vector_size(16)));
+
+constexpr v2di test_psadbw128() {
+  v16qi a = {10,20,30,40,50,60,70,80,
+             1,2,3,4,5,6,7,8};
+
+  v16qi b = {5,15,25,45,55,55,75,85,
+             10,0,3,9,1,10,2,8};
+
+  // Call the builtin directly
+  return __builtin_ia32_psadbw128(a, b);
+}
+
+static_assert(test_psadbw128()[0] == 40, "block0 mismatch");
+static_assert(test_psadbw128()[1] == 29, "block1 mismatch");