[X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx #84136

FreddyLeaf · 2024-03-06T08:13:37Z

This patch relands #67410 and fixes the cmpfail below:
#include <immintrin.h>
attribute((target("avx"))) void test(__m128 a, __m128 b) {
_mm_cmp_ps(a, b, 14);
}

According to Intel SDM, SSE/SSE2 instructions cmp[p|s][s|d] are supported when imm8 is in range of [0, 7]

…/sse2

llvmbot · 2024-03-06T08:14:08Z

@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: Freddy Ye (FreddyLeaf)

Changes

[X86] Change target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2
Handle attribute/target

Patch is 38.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84136.diff

11 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86.def (+4-4)
(modified) clang/lib/CodeGen/CodeGenFunction.cpp (+19)
(modified) clang/lib/Headers/avxintrin.h (-247)
(modified) clang/lib/Headers/emmintrin.h (+121)
(modified) clang/lib/Headers/xmmintrin.h (+128)
(added) clang/test/CodeGen/X86/attribute-cmpsd-no-error.c (+11)
(modified) clang/test/CodeGen/X86/avx-builtins.c (-96)
(added) clang/test/CodeGen/X86/cmp-avx-builtins-error.c (+22)
(modified) clang/test/CodeGen/X86/sse-builtins.c (+54)
(modified) clang/test/CodeGen/X86/sse2-builtins.c (+54)
(modified) clang/test/CodeGen/target-features-error-2.c (+1-1)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 207cde0414b54e..d141d51a509e8d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -462,12 +462,12 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx|sse2")
 TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx|sse")
 TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx|sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx|sse")
 TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index b87fc86f4e635f..f6544452ffb7c1 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -31,6 +31,7 @@
 #include "clang/AST/StmtObjC.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -2613,6 +2614,24 @@ void CGBuilderInserter::InsertHelper(
 // called function.
 void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
                                           const FunctionDecl *TargetDecl) {
+  // SemaCheking cannot handle below x86 builtins because they have different
+  // parameter ranges with different TargetAttribute of caller.
+  if (CGM.getContext().getTargetInfo().getTriple().isX86()) {
+    unsigned BuiltinID = TargetDecl->getBuiltinID();
+    if (BuiltinID == X86::BI__builtin_ia32_cmpps ||
+        BuiltinID == X86::BI__builtin_ia32_cmpss ||
+        BuiltinID == X86::BI__builtin_ia32_cmppd ||
+        BuiltinID == X86::BI__builtin_ia32_cmpsd) {
+      const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
+      llvm::StringMap<bool> TargetFetureMap;
+      CGM.getContext().getFunctionFeatureMap(TargetFetureMap, FD);
+      llvm::APSInt Result =
+          *(E->getArg(2)->getIntegerConstantExpr(CGM.getContext()));
+      if (Result.getSExtValue() > 7 && !TargetFetureMap.lookup("avx"))
+        CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature)
+            << TargetDecl->getDeclName() << "avx";
+    }
+  }
   return checkTargetFeatures(E->getBeginLoc(), TargetDecl);
 }
 
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index f116d8bc3a94c7..d6192518ea24ba 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -1573,15 +1573,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
   ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
                                      (__v4df)(__m256d)(b), (int)(mask)))
 
-/* Compare */
-#define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
-#define _CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
-#define _CMP_LE_OS    0x02 /* Less-than-or-equal (ordered, signaling)  */
-#define _CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
-#define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
-#define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
-#define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling)  */
-#define _CMP_ORD_Q    0x07 /* Ordered (non-signaling)   */
 #define _CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
 #define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unordered, signaling)  */
 #define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling)  */
@@ -1607,126 +1598,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 #define _CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
 #define _CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
 
-/// Compares each of the corresponding double-precision values of two
-///    128-bit vectors of [2 x double], using the operation specified by the
-///    immediate integer operand.
-///
-///    Returns a [2 x double] vector consisting of two doubles corresponding to
-///    the two comparison results: zero if the comparison is false, and all 1's
-///    if the comparison is true.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.
-///
-/// \param a
-///    A 128-bit vector of [2 x double].
-/// \param b
-///    A 128-bit vector of [2 x double].
-/// \param c
-///    An immediate integer operand, with bits [4:0] specifying which comparison
-///    operation to use: \n
-///    0x00: Equal (ordered, non-signaling) \n
-///    0x01: Less-than (ordered, signaling) \n
-///    0x02: Less-than-or-equal (ordered, signaling) \n
-///    0x03: Unordered (non-signaling) \n
-///    0x04: Not-equal (unordered, non-signaling) \n
-///    0x05: Not-less-than (unordered, signaling) \n
-///    0x06: Not-less-than-or-equal (unordered, signaling) \n
-///    0x07: Ordered (non-signaling) \n
-///    0x08: Equal (unordered, non-signaling) \n
-///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
-///    0x0A: Not-greater-than (unordered, signaling) \n
-///    0x0B: False (ordered, non-signaling) \n
-///    0x0C: Not-equal (ordered, non-signaling) \n
-///    0x0D: Greater-than-or-equal (ordered, signaling) \n
-///    0x0E: Greater-than (ordered, signaling) \n
-///    0x0F: True (unordered, non-signaling) \n
-///    0x10: Equal (ordered, signaling) \n
-///    0x11: Less-than (ordered, non-signaling) \n
-///    0x12: Less-than-or-equal (ordered, non-signaling) \n
-///    0x13: Unordered (signaling) \n
-///    0x14: Not-equal (unordered, signaling) \n
-///    0x15: Not-less-than (unordered, non-signaling) \n
-///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
-///    0x17: Ordered (signaling) \n
-///    0x18: Equal (unordered, signaling) \n
-///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
-///    0x1A: Not-greater-than (unordered, non-signaling) \n
-///    0x1B: False (ordered, signaling) \n
-///    0x1C: Not-equal (ordered, signaling) \n
-///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
-///    0x1E: Greater-than (ordered, non-signaling) \n
-///    0x1F: True (unordered, signaling)
-/// \returns A 128-bit vector of [2 x double] containing the comparison results.
-#define _mm_cmp_pd(a, b, c) \
-  ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
-                                 (__v2df)(__m128d)(b), (c)))
-
-/// Compares each of the corresponding values of two 128-bit vectors of
-///    [4 x float], using the operation specified by the immediate integer
-///    operand.
-///
-///    Returns a [4 x float] vector consisting of four floats corresponding to
-///    the four comparison results: zero if the comparison is false, and all 1's
-///    if the comparison is true.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.
-///
-/// \param a
-///    A 128-bit vector of [4 x float].
-/// \param b
-///    A 128-bit vector of [4 x float].
-/// \param c
-///    An immediate integer operand, with bits [4:0] specifying which comparison
-///    operation to use: \n
-///    0x00: Equal (ordered, non-signaling) \n
-///    0x01: Less-than (ordered, signaling) \n
-///    0x02: Less-than-or-equal (ordered, signaling) \n
-///    0x03: Unordered (non-signaling) \n
-///    0x04: Not-equal (unordered, non-signaling) \n
-///    0x05: Not-less-than (unordered, signaling) \n
-///    0x06: Not-less-than-or-equal (unordered, signaling) \n
-///    0x07: Ordered (non-signaling) \n
-///    0x08: Equal (unordered, non-signaling) \n
-///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
-///    0x0A: Not-greater-than (unordered, signaling) \n
-///    0x0B: False (ordered, non-signaling) \n
-///    0x0C: Not-equal (ordered, non-signaling) \n
-///    0x0D: Greater-than-or-equal (ordered, signaling) \n
-///    0x0E: Greater-than (ordered, signaling) \n
-///    0x0F: True (unordered, non-signaling) \n
-///    0x10: Equal (ordered, signaling) \n
-///    0x11: Less-than (ordered, non-signaling) \n
-///    0x12: Less-than-or-equal (ordered, non-signaling) \n
-///    0x13: Unordered (signaling) \n
-///    0x14: Not-equal (unordered, signaling) \n
-///    0x15: Not-less-than (unordered, non-signaling) \n
-///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
-///    0x17: Ordered (signaling) \n
-///    0x18: Equal (unordered, signaling) \n
-///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
-///    0x1A: Not-greater-than (unordered, non-signaling) \n
-///    0x1B: False (ordered, signaling) \n
-///    0x1C: Not-equal (ordered, signaling) \n
-///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
-///    0x1E: Greater-than (ordered, non-signaling) \n
-///    0x1F: True (unordered, signaling)
-/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-#define _mm_cmp_ps(a, b, c) \
-  ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
-                                (__v4sf)(__m128)(b), (c)))
-
 /// Compares each of the corresponding double-precision values of two
 ///    256-bit vectors of [4 x double], using the operation specified by the
 ///    immediate integer operand.
@@ -1847,124 +1718,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
   ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
                                    (__v8sf)(__m256)(b), (c)))
 
-/// Compares each of the corresponding scalar double-precision values of
-///    two 128-bit vectors of [2 x double], using the operation specified by the
-///    immediate integer operand.
-///
-///    If the result is true, all 64 bits of the destination vector are set;
-///    otherwise they are cleared.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.
-///
-/// \param a
-///    A 128-bit vector of [2 x double].
-/// \param b
-///    A 128-bit vector of [2 x double].
-/// \param c
-///    An immediate integer operand, with bits [4:0] specifying which comparison
-///    operation to use: \n
-///    0x00: Equal (ordered, non-signaling) \n
-///    0x01: Less-than (ordered, signaling) \n
-///    0x02: Less-than-or-equal (ordered, signaling) \n
-///    0x03: Unordered (non-signaling) \n
-///    0x04: Not-equal (unordered, non-signaling) \n
-///    0x05: Not-less-than (unordered, signaling) \n
-///    0x06: Not-less-than-or-equal (unordered, signaling) \n
-///    0x07: Ordered (non-signaling) \n
-///    0x08: Equal (unordered, non-signaling) \n
-///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
-///    0x0A: Not-greater-than (unordered, signaling) \n
-///    0x0B: False (ordered, non-signaling) \n
-///    0x0C: Not-equal (ordered, non-signaling) \n
-///    0x0D: Greater-than-or-equal (ordered, signaling) \n
-///    0x0E: Greater-than (ordered, signaling) \n
-///    0x0F: True (unordered, non-signaling) \n
-///    0x10: Equal (ordered, signaling) \n
-///    0x11: Less-than (ordered, non-signaling) \n
-///    0x12: Less-than-or-equal (ordered, non-signaling) \n
-///    0x13: Unordered (signaling) \n
-///    0x14: Not-equal (unordered, signaling) \n
-///    0x15: Not-less-than (unordered, non-signaling) \n
-///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
-///    0x17: Ordered (signaling) \n
-///    0x18: Equal (unordered, signaling) \n
-///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
-///    0x1A: Not-greater-than (unordered, non-signaling) \n
-///    0x1B: False (ordered, signaling) \n
-///    0x1C: Not-equal (ordered, signaling) \n
-///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
-///    0x1E: Greater-than (ordered, non-signaling) \n
-///    0x1F: True (unordered, signaling)
-/// \returns A 128-bit vector of [2 x double] containing the comparison results.
-#define _mm_cmp_sd(a, b, c) \
-  ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
-                                 (__v2df)(__m128d)(b), (c)))
-
-/// Compares each of the corresponding scalar values of two 128-bit
-///    vectors of [4 x float], using the operation specified by the immediate
-///    integer operand.
-///
-///    If the result is true, all 32 bits of the destination vector are set;
-///    otherwise they are cleared.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.
-///
-/// \param a
-///    A 128-bit vector of [4 x float].
-/// \param b
-///    A 128-bit vector of [4 x float].
-/// \param c
-///    An immediate integer operand, with bits [4:0] specifying which comparison
-///    operation to use: \n
-///    0x00: Equal (ordered, non-signaling) \n
-///    0x01: Less-than (ordered, signaling) \n
-///    0x02: Less-than-or-equal (ordered, signaling) \n
-///    0x03: Unordered (non-signaling) \n
-///    0x04: Not-equal (unordered, non-signaling) \n
-///    0x05: Not-less-than (unordered, signaling) \n
-///    0x06: Not-less-than-or-equal (unordered, signaling) \n
-///    0x07: Ordered (non-signaling) \n
-///    0x08: Equal (unordered, non-signaling) \n
-///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
-///    0x0A: Not-greater-than (unordered, signaling) \n
-///    0x0B: False (ordered, non-signaling) \n
-///    0x0C: Not-equal (ordered, non-signaling) \n
-///    0x0D: Greater-than-or-equal (ordered, signaling) \n
-///    0x0E: Greater-than (ordered, signaling) \n
-///    0x0F: True (unordered, non-signaling) \n
-///    0x10: Equal (ordered, signaling) \n
-///    0x11: Less-than (ordered, non-signaling) \n
-///    0x12: Less-than-or-equal (ordered, non-signaling) \n
-///    0x13: Unordered (signaling) \n
-///    0x14: Not-equal (unordered, signaling) \n
-///    0x15: Not-less-than (unordered, non-signaling) \n
-///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
-///    0x17: Ordered (signaling) \n
-///    0x18: Equal (unordered, signaling) \n
-///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
-///    0x1A: Not-greater-than (unordered, non-signaling) \n
-///    0x1B: False (ordered, signaling) \n
-///    0x1C: Not-equal (ordered, signaling) \n
-///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
-///    0x1E: Greater-than (ordered, non-signaling) \n
-///    0x1F: True (unordered, signaling)
-/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-#define _mm_cmp_ss(a, b, c) \
-  ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
-                                (__v4sf)(__m128)(b), (c)))
-
 /// Takes a [8 x i32] vector and returns the vector element value
 ///    indexed by the immediate constant operand.
 ///
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 1d451b5f5b25de..db6d43ea4ad6fe 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -4745,6 +4745,127 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) {
   return (__m128d)__a;
 }
 
+/// Compares each of the corresponding double-precision values of two
+///    128-bit vectors of [2 x double], using the operation specified by the
+///    immediate integer operand.
+///
+///    Returns a [2 x double] vector consisting of two doubles corresponding to
+///    the two comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction.
+///
+/// \param a
+///    A 128-bit vector of [2 x double].
+/// \param b
+///    A 128-bit vector of [2 x double].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use: \n
+///    (Note that without avx enabled, only bits [2:0] are supported) \n
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
+/// \returns A 128-bit vector of [2 x double] containing the comparison results.
+#define _mm_cmp_pd(a, b, c)                                                    \
+  ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b),   \
+                                 (c)))
+
+/// Compares each of the corresponding scalar double-precision values of
+///    two 128-bit vectors of [2 x double], using the operation specified by the
+///    immediate integer operand.
+///
+///    If the result is true, all 64 bits of the destination vector are set;
+///    otherwise they are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction.
+///
+/// \param a
+///    A 128-bit vector of [2 x double].
+/// \param b
+///    A 128-bit vector of [2 x double].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use: \n
+///    (Note that without avx enabled, only bits [2:0] are supported) \n
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, n...
[truncated]

github-actions · 2024-03-06T08:16:07Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 9a894e7d84892489826375f94c08ab16ccacc4bb 71641346ebadf8d21a109813fcf66f78e0932955 -- clang/test/CodeGen/X86/attribute-cmpsd-no-error.c clang/test/CodeGen/X86/cmp-avx-builtins-error.c clang/lib/CodeGen/CodeGenFunction.cpp clang/lib/Headers/avxintrin.h clang/lib/Headers/emmintrin.h clang/lib/Headers/xmmintrin.h clang/test/CodeGen/X86/avx-builtins.c clang/test/CodeGen/X86/sse-builtins.c clang/test/CodeGen/X86/sse2-builtins.c clang/test/CodeGen/target-features-error-2.c

View the diff from clang-format here.

diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 1f5993e0c3..a994475c6e 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2941,14 +2941,14 @@ _mm_movemask_ps(__m128 __a)
 }
 
 /* Compare */
-#define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
-#define _CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
-#define _CMP_LE_OS    0x02 /* Less-than-or-equal (ordered, signaling)  */
-#define _CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
-#define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
-#define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
-#define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling)  */
-#define _CMP_ORD_Q    0x07 /* Ordered (non-signaling)   */
+#define _CMP_EQ_OQ 0x00   /* Equal (ordered, non-signaling)  */
+#define _CMP_LT_OS 0x01   /* Less-than (ordered, signaling)  */
+#define _CMP_LE_OS 0x02   /* Less-than-or-equal (ordered, signaling)  */
+#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling)  */
+#define _CMP_NEQ_UQ 0x04  /* Not-equal (unordered, non-signaling)  */
+#define _CMP_NLT_US 0x05  /* Not-less-than (unordered, signaling)  */
+#define _CMP_NLE_US 0x06  /* Not-less-than-or-equal (unordered, signaling)  */
+#define _CMP_ORD_Q 0x07   /* Ordered (non-signaling)   */
 
 /// Compares each of the corresponding values of two 128-bit vectors of
 ///    [4 x float], using the operation specified by the immediate integer

FreddyLeaf · 2024-03-06T08:17:53Z

clang/test/CodeGen/target-features-error-2.c

@@ -15,7 +15,7 @@ int baz(__m256i a) {

 #if NEED_AVX_2
 __m128 need_avx(__m128 a, __m128 b) {
-  return _mm_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}}
+  return _mm_cmp_ps(a, b, 8); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}}


@phoebewang @RKSimon @KanRobert
This change shows the behavior of compiling these intrinsics with parameter of [8,31] after this PR.

FreddyLeaf · 2024-03-06T08:18:35Z

The difference with #67410 is here: 54c78b5

RKSimon

Should we update the existing SSE/SSE2 cmpeq/cmplt intinsics to use __builtin_ia32_cmp* instead of __builtin_ia32_cmpeq??/__builtin_ia32_cmplt?? etc?

clang/include/clang/Basic/BuiltinsX86.def

clang/lib/CodeGen/CodeGenFunction.cpp

clang/test/CodeGen/X86/sse-builtins.c

clang/test/CodeGen/X86/sse2-builtins.c

clang/include/clang/Basic/BuiltinsX86.def

KanRobert · 2024-03-06T11:33:40Z

My understanding is that you change the required feature of __builtin_ia32_cmp[p|s][s|d] from avx to sse for some condition codes. The title is not accurate.

clang/lib/CodeGen/CodeGenFunction.cpp

FreddyLeaf · 2024-03-06T11:44:08Z

Should we update the existing SSE/SSE2 cmpeq/cmplt intinsics to use __builtin_ia32_cmp* instead of __builtin_ia32_cmpeq??/__builtin_ia32_cmplt?? etc?

Good question! Didn't notice this issue before.

FreddyLeaf · 2024-03-06T11:47:43Z

My understanding is that you change the required feature of __builtin_ia32_cmp[p|s][s|d] from avx to sse for some condition codes. The title is not accurate.

You are right. Updated.

FreddyLeaf · 2024-03-06T12:27:22Z

Should we update the existing SSE/SSE2 cmpeq/cmplt intinsics to use __builtin_ia32_cmp* instead of __builtin_ia32_cmpeq??/__builtin_ia32_cmplt?? etc?

Good question! Didn't notice this issue before.

Seems doable. I'll prepare a following PR after this.

clang/lib/Headers/emmintrin.h

clang/lib/Headers/xmmintrin.h

clang/test/CodeGen/X86/sse-builtins.c

KanRobert · 2024-03-07T01:53:45Z

Still not understand (code in clang/lib/CodeGen/CodeGenFunction.cpp). Shouldn't the target feature of the caller be passed to the intrinsic when checking the argument of the intrinsic?

FreddyLeaf · 2024-03-07T01:59:35Z

Still not understand. Shouldn't the target feature of the caller be passed to the intrinsic when checking the argument of the intrinsic?

The target features passing from option can be passed to Semachecking. It's special for attribute_target, which is handled in handleTargetAttr, which is after Sema::CheckBuiltinFunctionCall.

KanRobert · 2024-03-07T03:06:37Z

Still not understand. Shouldn't the target feature of the caller be passed to the intrinsic when checking the argument of the intrinsic?

The target features passing from option can be passed to Semachecking. It's special for attribute_target, which is handled in handleTargetAttr, which is after Sema::CheckBuiltinFunctionCall.

Then why not move the call handleTargetAttr before Sema::CheckBuiltinFunctionCall?

FreddyLeaf · 2024-03-07T03:14:53Z

Still not understand. Shouldn't the target feature of the caller be passed to the intrinsic when checking the argument of the intrinsic?

The target features passing from option can be passed to Semachecking. It's special for attribute_target, which is handled in handleTargetAttr, which is after Sema::CheckBuiltinFunctionCall.

Then why not move the call handleTargetAttr before Sema::CheckBuiltinFunctionCall?

Mainly because all Sema::CheckBuiltinFunctionCall before doesn't dependent on target feature, right? We don't have any other builtins have different range limit of same const parameter. But clang::CodegenFunction has the information of the whole function, which I think is a more reasonable place to do the check.

FreddyLeaf · 2024-03-08T02:08:23Z

ping for review

phoebewang

LGTM.

KanRobert

LGTM

RKSimon

LGTM - but @pogo59 needs a headup as it will affect some ongoing documentation cleanup work

pogo59

Thanks for the heads-up @RKSimon I've made suggestions for consistency with existing practice in emmintrin.h which I've adopted in the latest revision to #83316.

I'm unsure how replacing a function declaration with an \fn directive will play with our documentation tooling. But we can figure that out later.

clang/lib/Headers/emmintrin.h

clang/lib/Headers/xmmintrin.h

pogo59

The example for \fn in the doxygen documentation puts the directive at the top of the comment block, not at the end. Do we know whether putting it at the end will produce the expected result?

FreddyLeaf · 2024-03-09T05:49:02Z

Thanks for the heads-up @RKSimon I've made suggestions for consistency with existing practice in emmintrin.h which I've adopted in the latest revision to #83316.

I'm unsure how replacing a function declaration with an \fn directive will play with our documentation tooling. But we can figure that out later.

I've committed your suggestions. FYI \fn usage was referring

llvm-project/clang/lib/Headers/avxvnniintrin.h

Line 32 in c58c827

/// \fn __m256i _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)

pogo59 · 2024-03-11T16:42:12Z

Thanks!

FreddyLeaf added 2 commits March 5, 2024 16:30

[X86] Change target of __builtin_ia32_cmp[p|s][s|d] from avx into sse…

2ba698b

…/sse2

Handle attribute/target

54c78b5

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang:codegen labels Mar 6, 2024

FreddyLeaf requested review from phoebewang, KanRobert and RKSimon March 6, 2024 08:15

FreddyLeaf commented Mar 6, 2024

View reviewed changes

RKSimon requested a review from pogo59 March 6, 2024 09:55

RKSimon reviewed Mar 6, 2024

View reviewed changes

KanRobert reviewed Mar 6, 2024

View reviewed changes

clang/include/clang/Basic/BuiltinsX86.def Outdated Show resolved Hide resolved

KanRobert reviewed Mar 6, 2024

View reviewed changes

clang/lib/CodeGen/CodeGenFunction.cpp Outdated Show resolved Hide resolved

FreddyLeaf changed the title ~~[X86] Change target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2~~ [X86] Finely handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx Mar 6, 2024

FreddyLeaf added 2 commits March 6, 2024 20:12

Address comments.

d8979e2

typo

c9a3d89

RKSimon reviewed Mar 6, 2024

View reviewed changes

clang/lib/Headers/emmintrin.h Show resolved Hide resolved

phoebewang reviewed Mar 6, 2024

View reviewed changes

clang/lib/Headers/xmmintrin.h Outdated Show resolved Hide resolved

phoebewang reviewed Mar 6, 2024

View reviewed changes

clang/test/CodeGen/X86/sse-builtins.c Show resolved Hide resolved

Address comments.

51eda06

RKSimon changed the title ~~[X86] Finely handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx~~ [X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx Mar 7, 2024

phoebewang approved these changes Mar 8, 2024

View reviewed changes

KanRobert approved these changes Mar 8, 2024

View reviewed changes

RKSimon approved these changes Mar 8, 2024

View reviewed changes

pogo59 reviewed Mar 8, 2024

View reviewed changes

clang/lib/Headers/emmintrin.h Outdated Show resolved Hide resolved

clang/lib/Headers/emmintrin.h Outdated Show resolved Hide resolved

clang/lib/Headers/xmmintrin.h Outdated Show resolved Hide resolved

clang/lib/Headers/xmmintrin.h Outdated Show resolved Hide resolved

pogo59 reviewed Mar 8, 2024

View reviewed changes

Address comments.

7164134

FreddyLeaf merged commit fc0fc76 into llvm:main Mar 9, 2024
2 of 4 checks passed

FreddyLeaf deleted the cmp_sd branch March 9, 2024 05:49

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx #84136

[X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx #84136

FreddyLeaf commented Mar 6, 2024 •

edited

Loading

llvmbot commented Mar 6, 2024 •

edited

Loading

github-actions bot commented Mar 6, 2024 •

edited

Loading

FreddyLeaf Mar 6, 2024

FreddyLeaf commented Mar 6, 2024

RKSimon left a comment

KanRobert commented Mar 6, 2024

FreddyLeaf commented Mar 6, 2024

FreddyLeaf commented Mar 6, 2024

FreddyLeaf commented Mar 6, 2024

KanRobert commented Mar 7, 2024 •

edited

Loading

FreddyLeaf commented Mar 7, 2024 •

edited

Loading

KanRobert commented Mar 7, 2024

FreddyLeaf commented Mar 7, 2024

FreddyLeaf commented Mar 8, 2024

phoebewang left a comment

KanRobert left a comment

RKSimon left a comment

pogo59 left a comment

pogo59 left a comment

FreddyLeaf commented Mar 9, 2024

pogo59 commented Mar 11, 2024

[X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx #84136

[X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx #84136

Conversation

FreddyLeaf commented Mar 6, 2024 • edited Loading

llvmbot commented Mar 6, 2024 • edited Loading

github-actions bot commented Mar 6, 2024 • edited Loading

FreddyLeaf Mar 6, 2024

Choose a reason for hiding this comment

FreddyLeaf commented Mar 6, 2024

RKSimon left a comment

Choose a reason for hiding this comment

KanRobert commented Mar 6, 2024

FreddyLeaf commented Mar 6, 2024

FreddyLeaf commented Mar 6, 2024

FreddyLeaf commented Mar 6, 2024

KanRobert commented Mar 7, 2024 • edited Loading

FreddyLeaf commented Mar 7, 2024 • edited Loading

KanRobert commented Mar 7, 2024

FreddyLeaf commented Mar 7, 2024

FreddyLeaf commented Mar 8, 2024

phoebewang left a comment

Choose a reason for hiding this comment

KanRobert left a comment

Choose a reason for hiding this comment

RKSimon left a comment

Choose a reason for hiding this comment

pogo59 left a comment

Choose a reason for hiding this comment

pogo59 left a comment

Choose a reason for hiding this comment

FreddyLeaf commented Mar 9, 2024

pogo59 commented Mar 11, 2024

FreddyLeaf commented Mar 6, 2024 •

edited

Loading

llvmbot commented Mar 6, 2024 •

edited

Loading

github-actions bot commented Mar 6, 2024 •

edited

Loading

KanRobert commented Mar 7, 2024 •

edited

Loading

FreddyLeaf commented Mar 7, 2024 •

edited

Loading