[VP] Add vp.fcmp comparison intrinsic and docs

This patch adds the first support for vector-predicated comparison intrinsics, starting with vp.fcmp. It uses metadata to encode its condition code, like the llvm.experimental.constrained.fcmp intrinsic. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D121292
llvm · Mar 30, 2022 · da6131f · da6131f
1 parent e18cc52
commit da6131f
Show file tree

Hide file tree

Showing 9 changed files with 175 additions and 5 deletions.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
@@ -20283,6 +20283,64 @@ Examples:
       %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
 
 
+.. _int_vp_fcmp:
+
+'``llvm.vp.fcmp.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i1> @llvm.vp.fcmp.v16f32(<16 x float> <left_op>, <16 x float> <right_op>, metadata <condition code>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x i1> @llvm.vp.fcmp.nxv4f32(<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, metadata <condition code>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x i1> @llvm.vp.fcmp.v256f64(<256 x double> <left_op>, <256 x double> <right_op>, metadata <condition code>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+The '``llvm.vp.fcmp``' intrinsic returns a vector of boolean values based on
+the comparison of its operands. The operation has a mask and an explicit vector
+length parameter.
+
+
+Arguments:
+""""""""""
+
+The '``llvm.vp.fcmp``' intrinsic takes the two values to compare as its first
+and second operands. These two values must be vectors of :ref:`floating-point
+<t_floating>` types.
+The return type is the result of the comparison. The return type must be a
+vector of :ref:`i1 <t_integer>` type. The fourth operand is the vector mask.
+The return type, the values to compare, and the vector mask have the same
+number of elements. The third operand is the condition code indicating the kind
+of comparison to perform. It must be a metadata string with :ref:`one of the
+supported floating-point condition code values <fcmp_md_cc>`. The fifth operand
+is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.fcmp``' compares its first two operands according to the
+condition code given as the third operand. The operands are compared element by
+element on each enabled lane, where the the semantics of the comparison are
+defined :ref:`according to the condition code <fcmp_md_cc_sem>`. Masked-off
+lanes are undefined.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"oeq", <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = fcmp oeq <4 x float> %a, %b
+      %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
+
+
 .. _int_mload_mstore:
 
 Masked Vector Load and Store Intrinsics
@@ -21423,6 +21481,8 @@ of floating-point values. Both arguments must have identical types.
 The third argument is the condition code indicating the kind of comparison
 to perform. It must be a metadata string with one of the following values:
 
+.. _fcmp_md_cc:
+
 - "``oeq``": ordered and equal
 - "``ogt``": ordered and greater than
 - "``oge``": ordered and greater than or equal
@@ -21451,6 +21511,8 @@ as the third argument. If the operands are vectors, then the
 vectors are compared element by element. Each comparison performed
 always yields an :ref:`i1 <t_integer>` result, as follows:
 
+.. _fcmp_md_cc_sem:
+
 - "``oeq``": yields ``true`` if both operands are not a NAN and ``op1``
   is equal to ``op2``.
 - "``ogt``": yields ``true`` if both operands are not a NAN and ``op1``

diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -488,6 +488,23 @@ class VPCastIntrinsic : public VPIntrinsic {
   /// @}
 };
 
+class VPCmpIntrinsic : public VPIntrinsic {
+public:
+  static bool isVPCmp(Intrinsic::ID ID);
+
+  CmpInst::Predicate getPredicate() const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// @{
+  static bool classof(const IntrinsicInst *I) {
+    return VPCmpIntrinsic::isVPCmp(I->getIntrinsicID());
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+  /// @}
+};
+
 /// This is the common base class for constrained floating point intrinsics.
 class ConstrainedFPIntrinsic : public IntrinsicInst {
 public:

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
@@ -1567,6 +1567,16 @@ def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
                                 LLVMMatchType<0>,
                                 llvm_i32_ty]>;
 
+// Comparisons.
+let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
+  def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+                                [ llvm_anyvector_ty,
+                                  LLVMMatchType<0>,
+                                  llvm_metadata_ty,
+                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                  llvm_i32_ty]>;
+}
+
 // Reductions
 let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
   def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],

diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -126,6 +126,13 @@
 #define VP_PROPERTY_CASTOP
 #endif
 
+// This VP Intrinsic is a comparison operation
+// The condition code arg is at CCPOS and accepts floating-point condition
+// codes if ISFP is set, else it accepts integer condition codes.
+#ifndef VP_PROPERTY_CMP
+#define VP_PROPERTY_CMP(CCPOS, ISFP)
+#endif
+
 /// } Property Macros
 
 ///// Integer Arithmetic {
@@ -252,6 +259,16 @@ HELPER_REGISTER_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1)
 
 ///// } Type Casts
 
+///// Comparisons {
+// llvm.vp.fcmp(x,y,cc,mask,vlen)
+BEGIN_REGISTER_VP(vp_fcmp, 3, 4, VP_FCMP, -1)
+VP_PROPERTY_FUNCTIONAL_OPC(FCmp)
+VP_PROPERTY_CMP(2, true)
+VP_PROPERTY_CONSTRAINEDFP(0, 1, experimental_constrained_fcmp)
+END_REGISTER_VP(vp_fcmp, VP_FCMP)
+
+///// } Comparisons
+
 ///// Memory Operations {
 // llvm.vp.store(val,ptr,mask,vlen)
 BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
@@ -424,6 +441,7 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
 #undef END_REGISTER_VP_SDNODE
 #undef VP_PROPERTY_BINARYOP
 #undef VP_PROPERTY_CASTOP
+#undef VP_PROPERTY_CMP
 #undef VP_PROPERTY_CONSTRAINEDFP
 #undef VP_PROPERTY_FUNCTIONAL_INTRINSIC
 #undef VP_PROPERTY_FUNCTIONAL_OPC

diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
@@ -236,8 +236,8 @@ bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const {
   return true;
 }
 
-FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
-  Metadata *MD = cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
+static FCmpInst::Predicate getFPPredicateFromMD(const Value *Op) {
+  Metadata *MD = cast<MetadataAsValue>(Op)->getMetadata();
   if (!MD || !isa<MDString>(MD))
     return FCmpInst::BAD_FCMP_PREDICATE;
   return StringSwitch<FCmpInst::Predicate>(cast<MDString>(MD)->getString())
@@ -258,6 +258,10 @@ FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
       .Default(FCmpInst::BAD_FCMP_PREDICATE);
 }
 
+FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
+  return getFPPredicateFromMD(getArgOperand(2));
+}
+
 bool ConstrainedFPIntrinsic::isUnaryOp() const {
   switch (getIntrinsicID()) {
   default:
@@ -560,6 +564,37 @@ bool VPCastIntrinsic::isVPCast(Intrinsic::ID ID) {
   return false;
 }
 
+bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) {
+  switch (ID) {
+  default:
+    break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_CMP(CCPOS, ...) return true;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+  return false;
+}
+
+CmpInst::Predicate VPCmpIntrinsic::getPredicate() const {
+  bool IsFP = true;
+  Optional<unsigned> CCArgIdx;
+  switch (getIntrinsicID()) {
+  default:
+    break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_CMP(CCPOS, ISFP)                                           \
+  CCArgIdx = CCPOS;                                                            \
+  IsFP = ISFP;                                                                 \
+  break;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+  assert(CCArgIdx.hasValue() && IsFP &&
+         "Unexpected vector-predicated comparison");
+  return getFPPredicateFromMD(getArgOperand(*CCArgIdx));
+}
+
 unsigned VPReductionIntrinsic::getVectorParamPos() const {
   return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID());
 }

diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
@@ -5601,6 +5601,11 @@ void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
            "equal",
            *VPCast);
   }
+  if (VPI.getIntrinsicID() == Intrinsic::vp_fcmp) {
+    auto Pred = cast<VPCmpIntrinsic>(&VPI)->getPredicate();
+    Assert(CmpInst::isFPPredicate(Pred),
+           "invalid predicate for VP FP comparison intrinsic", &VPI);
+  }
 }
 
 void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {

diff --git a/llvm/test/Verifier/invalid-vp-intrinsics.ll b/llvm/test/Verifier/invalid-vp-intrinsics.ll
@@ -1,6 +1,7 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32);
+declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32)
+declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32)
 
 ; CHECK: VP cast intrinsic first argument and result vector lengths must be equal
 ; CHECK-NEXT: %r0 = call <4 x i32>
@@ -9,3 +10,14 @@ define void @test_vp_fptosi(<8 x float> %src, <4 x i1> %m, i32 %n) {
   %r0 = call <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float> %src, <4 x i1> %m, i32 %n)
   ret void
 }
+
+; CHECK: invalid predicate for VP FP comparison intrinsic
+; CHECK-NEXT: %r0 = call <4 x i1> @llvm.vp.fcmp.v4f32
+; CHECK: invalid predicate for VP FP comparison intrinsic
+; CHECK-NEXT: %r1 = call <4 x i1> @llvm.vp.fcmp.v4f32
+
+define void @test_vp_fcmp(<4 x float> %a, <4 x float> %b, <4 x i1> %m, i32 %n) {
+  %r0 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"bad", <4 x i1> %m, i32 %n)
+  %r1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"eq", <4 x i1> %m, i32 %n)
+  ret void
+}
diff --git a/llvm/test/Verifier/vp-intrinsics.ll b/llvm/test/Verifier/vp-intrinsics.ll
@@ -63,6 +63,11 @@ define void @test_vp_int_fp_conversions(<8 x i32> %i0, <8 x float> %f0, <8 x i1>
   ret void
 }
 
+define void @test_vp_comparisons(<8 x float> %f0, <8 x float> %f1, <8 x i1> %mask, i32 %evl) {
+  %r0 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %mask, i32 %evl)
+  ret void
+}
+
 ; integer arith
 declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
 declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
@@ -101,6 +106,8 @@ declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32)
 ; casts
 declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32)
 declare <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float>, <8 x i1>, i32)
+; compares
+declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32)
 ; shuffles
 declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32>, <8 x i32>, i32, <8 x i1>, i32, i32)
 declare <vscale x 8 x i32> @llvm.experimental.vp.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32, <vscale x 8 x i1>, i32, i32)
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -97,6 +97,9 @@ class VPIntrinsicTest : public testing::Test {
     Str << " declare <8 x float> @llvm.vp.sitofp.v8f32"
         << ".v8i32(<8 x i32>, <8 x i1>, i32) ";
 
+    Str << " declare <8 x i1> @llvm.vp.fcmp.v8f32"
+        << "(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) ";
+
     return parseAssemblyString(Str.str(), Err, C);
   }
 };
@@ -314,7 +317,7 @@ TEST_F(VPIntrinsicTest, VPIntrinsicDeclarationForParams) {
 }
 
 /// Check that the HANDLE_VP_TO_CONSTRAINEDFP maps to an existing intrinsic with
-/// the right amount of metadata args.
+/// the right amount of constrained-fp metadata args.
 TEST_F(VPIntrinsicTest, HandleToConstrainedFP) {
 #define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, CFPID)                  \
   {                                                                            \
@@ -323,7 +326,8 @@ TEST_F(VPIntrinsicTest, HandleToConstrainedFP) {
     unsigned NumMetadataArgs = 0;                                              \
     for (auto TD : T)                                                          \
       NumMetadataArgs += (TD.Kind == Intrinsic::IITDescriptor::Metadata);      \
-    ASSERT_EQ(NumMetadataArgs, (unsigned)(HASROUND + HASEXCEPT));              \
+    bool IsCmp = Intrinsic::CFPID == Intrinsic::experimental_constrained_fcmp; \
+    ASSERT_EQ(NumMetadataArgs, (unsigned)(IsCmp + HASROUND + HASEXCEPT));      \
   }
 #include "llvm/IR/VPIntrinsics.def"
 }