Skip to content

Commit

Permalink
[VP] Add vp.fcmp comparison intrinsic and docs
Browse files Browse the repository at this point in the history
This patch adds the first support for vector-predicated comparison
intrinsics, starting with vp.fcmp. It uses metadata to encode its
condition code, like the llvm.experimental.constrained.fcmp intrinsic.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D121292
  • Loading branch information
frasercrmck committed Mar 30, 2022
1 parent e18cc52 commit da6131f
Show file tree
Hide file tree
Showing 9 changed files with 175 additions and 5 deletions.
62 changes: 62 additions & 0 deletions llvm/docs/LangRef.rst
Expand Up @@ -20283,6 +20283,64 @@ Examples:
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef


.. _int_vp_fcmp:

'``llvm.vp.fcmp.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""
This is an overloaded intrinsic.

::

declare <16 x i1> @llvm.vp.fcmp.v16f32(<16 x float> <left_op>, <16 x float> <right_op>, metadata <condition code>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x i1> @llvm.vp.fcmp.nxv4f32(<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, metadata <condition code>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x i1> @llvm.vp.fcmp.v256f64(<256 x double> <left_op>, <256 x double> <right_op>, metadata <condition code>, <256 x i1> <mask>, i32 <vector_length>)

Overview:
"""""""""

The '``llvm.vp.fcmp``' intrinsic returns a vector of boolean values based on
the comparison of its operands. The operation has a mask and an explicit vector
length parameter.


Arguments:
""""""""""

The '``llvm.vp.fcmp``' intrinsic takes the two values to compare as its first
and second operands. These two values must be vectors of :ref:`floating-point
<t_floating>` types.
The return type is the result of the comparison. The return type must be a
vector of :ref:`i1 <t_integer>` type. The fourth operand is the vector mask.
The return type, the values to compare, and the vector mask have the same
number of elements. The third operand is the condition code indicating the kind
of comparison to perform. It must be a metadata string with :ref:`one of the
supported floating-point condition code values <fcmp_md_cc>`. The fifth operand
is the explicit vector length of the operation.

Semantics:
""""""""""

The '``llvm.vp.fcmp``' compares its first two operands according to the
condition code given as the third operand. The operands are compared element by
element on each enabled lane, where the the semantics of the comparison are
defined :ref:`according to the condition code <fcmp_md_cc_sem>`. Masked-off
lanes are undefined.

Examples:
"""""""""

.. code-block:: llvm

%r = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"oeq", <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r

%t = fcmp oeq <4 x float> %a, %b
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef


.. _int_mload_mstore:

Masked Vector Load and Store Intrinsics
Expand Down Expand Up @@ -21423,6 +21481,8 @@ of floating-point values. Both arguments must have identical types.
The third argument is the condition code indicating the kind of comparison
to perform. It must be a metadata string with one of the following values:

.. _fcmp_md_cc:

- "``oeq``": ordered and equal
- "``ogt``": ordered and greater than
- "``oge``": ordered and greater than or equal
Expand Down Expand Up @@ -21451,6 +21511,8 @@ as the third argument. If the operands are vectors, then the
vectors are compared element by element. Each comparison performed
always yields an :ref:`i1 <t_integer>` result, as follows:

.. _fcmp_md_cc_sem:

- "``oeq``": yields ``true`` if both operands are not a NAN and ``op1``
is equal to ``op2``.
- "``ogt``": yields ``true`` if both operands are not a NAN and ``op1``
Expand Down
17 changes: 17 additions & 0 deletions llvm/include/llvm/IR/IntrinsicInst.h
Expand Up @@ -488,6 +488,23 @@ class VPCastIntrinsic : public VPIntrinsic {
/// @}
};

class VPCmpIntrinsic : public VPIntrinsic {
public:
static bool isVPCmp(Intrinsic::ID ID);

CmpInst::Predicate getPredicate() const;

/// Methods for support type inquiry through isa, cast, and dyn_cast:
/// @{
static bool classof(const IntrinsicInst *I) {
return VPCmpIntrinsic::isVPCmp(I->getIntrinsicID());
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
/// @}
};

/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Expand Up @@ -1567,6 +1567,16 @@ def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
LLVMMatchType<0>,
llvm_i32_ty]>;

// Comparisons.
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
[ llvm_anyvector_ty,
LLVMMatchType<0>,
llvm_metadata_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
}

// Reductions
let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
Expand Down
18 changes: 18 additions & 0 deletions llvm/include/llvm/IR/VPIntrinsics.def
Expand Up @@ -126,6 +126,13 @@
#define VP_PROPERTY_CASTOP
#endif

// This VP Intrinsic is a comparison operation
// The condition code arg is at CCPOS and accepts floating-point condition
// codes if ISFP is set, else it accepts integer condition codes.
#ifndef VP_PROPERTY_CMP
#define VP_PROPERTY_CMP(CCPOS, ISFP)
#endif

/// } Property Macros

///// Integer Arithmetic {
Expand Down Expand Up @@ -252,6 +259,16 @@ HELPER_REGISTER_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1)

///// } Type Casts

///// Comparisons {
// llvm.vp.fcmp(x,y,cc,mask,vlen)
BEGIN_REGISTER_VP(vp_fcmp, 3, 4, VP_FCMP, -1)
VP_PROPERTY_FUNCTIONAL_OPC(FCmp)
VP_PROPERTY_CMP(2, true)
VP_PROPERTY_CONSTRAINEDFP(0, 1, experimental_constrained_fcmp)
END_REGISTER_VP(vp_fcmp, VP_FCMP)

///// } Comparisons

///// Memory Operations {
// llvm.vp.store(val,ptr,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
Expand Down Expand Up @@ -424,6 +441,7 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
#undef END_REGISTER_VP_SDNODE
#undef VP_PROPERTY_BINARYOP
#undef VP_PROPERTY_CASTOP
#undef VP_PROPERTY_CMP
#undef VP_PROPERTY_CONSTRAINEDFP
#undef VP_PROPERTY_FUNCTIONAL_INTRINSIC
#undef VP_PROPERTY_FUNCTIONAL_OPC
Expand Down
39 changes: 37 additions & 2 deletions llvm/lib/IR/IntrinsicInst.cpp
Expand Up @@ -236,8 +236,8 @@ bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const {
return true;
}

FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
Metadata *MD = cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
static FCmpInst::Predicate getFPPredicateFromMD(const Value *Op) {
Metadata *MD = cast<MetadataAsValue>(Op)->getMetadata();
if (!MD || !isa<MDString>(MD))
return FCmpInst::BAD_FCMP_PREDICATE;
return StringSwitch<FCmpInst::Predicate>(cast<MDString>(MD)->getString())
Expand All @@ -258,6 +258,10 @@ FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
.Default(FCmpInst::BAD_FCMP_PREDICATE);
}

FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
return getFPPredicateFromMD(getArgOperand(2));
}

bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
Expand Down Expand Up @@ -560,6 +564,37 @@ bool VPCastIntrinsic::isVPCast(Intrinsic::ID ID) {
return false;
}

bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) {
switch (ID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_CMP(CCPOS, ...) return true;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
return false;
}

CmpInst::Predicate VPCmpIntrinsic::getPredicate() const {
bool IsFP = true;
Optional<unsigned> CCArgIdx;
switch (getIntrinsicID()) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#define VP_PROPERTY_CMP(CCPOS, ISFP) \
CCArgIdx = CCPOS; \
IsFP = ISFP; \
break;
#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
assert(CCArgIdx.hasValue() && IsFP &&
"Unexpected vector-predicated comparison");
return getFPPredicateFromMD(getArgOperand(*CCArgIdx));
}

unsigned VPReductionIntrinsic::getVectorParamPos() const {
return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID());
}
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Expand Up @@ -5601,6 +5601,11 @@ void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
"equal",
*VPCast);
}
if (VPI.getIntrinsicID() == Intrinsic::vp_fcmp) {
auto Pred = cast<VPCmpIntrinsic>(&VPI)->getPredicate();
Assert(CmpInst::isFPPredicate(Pred),
"invalid predicate for VP FP comparison intrinsic", &VPI);
}
}

void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Expand Down
14 changes: 13 additions & 1 deletion llvm/test/Verifier/invalid-vp-intrinsics.ll
@@ -1,6 +1,7 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s

declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32);
declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32)
declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32)

; CHECK: VP cast intrinsic first argument and result vector lengths must be equal
; CHECK-NEXT: %r0 = call <4 x i32>
Expand All @@ -9,3 +10,14 @@ define void @test_vp_fptosi(<8 x float> %src, <4 x i1> %m, i32 %n) {
%r0 = call <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float> %src, <4 x i1> %m, i32 %n)
ret void
}

; CHECK: invalid predicate for VP FP comparison intrinsic
; CHECK-NEXT: %r0 = call <4 x i1> @llvm.vp.fcmp.v4f32
; CHECK: invalid predicate for VP FP comparison intrinsic
; CHECK-NEXT: %r1 = call <4 x i1> @llvm.vp.fcmp.v4f32

define void @test_vp_fcmp(<4 x float> %a, <4 x float> %b, <4 x i1> %m, i32 %n) {
%r0 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"bad", <4 x i1> %m, i32 %n)
%r1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"eq", <4 x i1> %m, i32 %n)
ret void
}
7 changes: 7 additions & 0 deletions llvm/test/Verifier/vp-intrinsics.ll
Expand Up @@ -63,6 +63,11 @@ define void @test_vp_int_fp_conversions(<8 x i32> %i0, <8 x float> %f0, <8 x i1>
ret void
}

define void @test_vp_comparisons(<8 x float> %f0, <8 x float> %f1, <8 x i1> %mask, i32 %evl) {
%r0 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %mask, i32 %evl)
ret void
}

; integer arith
declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
Expand Down Expand Up @@ -101,6 +106,8 @@ declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32)
; casts
declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float>, <8 x i1>, i32)
; compares
declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32)
; shuffles
declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32>, <8 x i32>, i32, <8 x i1>, i32, i32)
declare <vscale x 8 x i32> @llvm.experimental.vp.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32, <vscale x 8 x i1>, i32, i32)
8 changes: 6 additions & 2 deletions llvm/unittests/IR/VPIntrinsicTest.cpp
Expand Up @@ -97,6 +97,9 @@ class VPIntrinsicTest : public testing::Test {
Str << " declare <8 x float> @llvm.vp.sitofp.v8f32"
<< ".v8i32(<8 x i32>, <8 x i1>, i32) ";

Str << " declare <8 x i1> @llvm.vp.fcmp.v8f32"
<< "(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) ";

return parseAssemblyString(Str.str(), Err, C);
}
};
Expand Down Expand Up @@ -314,7 +317,7 @@ TEST_F(VPIntrinsicTest, VPIntrinsicDeclarationForParams) {
}

/// Check that the HANDLE_VP_TO_CONSTRAINEDFP maps to an existing intrinsic with
/// the right amount of metadata args.
/// the right amount of constrained-fp metadata args.
TEST_F(VPIntrinsicTest, HandleToConstrainedFP) {
#define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, CFPID) \
{ \
Expand All @@ -323,7 +326,8 @@ TEST_F(VPIntrinsicTest, HandleToConstrainedFP) {
unsigned NumMetadataArgs = 0; \
for (auto TD : T) \
NumMetadataArgs += (TD.Kind == Intrinsic::IITDescriptor::Metadata); \
ASSERT_EQ(NumMetadataArgs, (unsigned)(HASROUND + HASEXCEPT)); \
bool IsCmp = Intrinsic::CFPID == Intrinsic::experimental_constrained_fcmp; \
ASSERT_EQ(NumMetadataArgs, (unsigned)(IsCmp + HASROUND + HASEXCEPT)); \
}
#include "llvm/IR/VPIntrinsics.def"
}
Expand Down

0 comments on commit da6131f

Please sign in to comment.