Skip to content

Commit

Permalink
[VP,Integer,#1] Vector-predicated integer intrinsics
Browse files Browse the repository at this point in the history
Summary:
This patch adds IR intrinsics for vector-predicated integer arithmetic.

It is subpatch #1 of the [integer
slice](https://reviews.llvm.org/D57504#1732277) of
[LLVM-VP](https://reviews.llvm.org/D57504).  LLVM-VP is a larger effort to bring
native vector predication to LLVM.

Reviewed By: andrew.w.kaylor

Differential Revision: https://reviews.llvm.org/D69891
  • Loading branch information
simoll committed Mar 19, 2020
1 parent bb7d2b1 commit 733b319
Show file tree
Hide file tree
Showing 13 changed files with 1,258 additions and 4 deletions.
697 changes: 697 additions & 0 deletions llvm/docs/LangRef.rst

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Expand Up @@ -1167,6 +1167,15 @@ class TargetTransformInfo {
/// to a stack reload.
unsigned getGISelRematGlobalCost() const;

/// \name Vector Predication Information
/// @{
/// Whether the target supports the %evl parameter of VP intrinsic efficiently in hardware.
/// (see LLVM Language Reference - "Vector Predication Intrinsics")
/// Use of %evl is discouraged when that is not the case.
bool hasActiveVectorLength() const;

/// @}

/// @}

private:
Expand Down Expand Up @@ -1420,6 +1429,7 @@ class TargetTransformInfo::Concept {
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
virtual bool hasActiveVectorLength() const = 0;
virtual int getInstructionLatency(const Instruction *I) = 0;
};

Expand Down Expand Up @@ -1913,6 +1923,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getGISelRematGlobalCost();
}

bool hasActiveVectorLength() const override {
return Impl.hasActiveVectorLength();
}

int getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Expand Up @@ -628,6 +628,10 @@ class TargetTransformInfoImplBase {
return 1;
}

bool hasActiveVectorLength() const {
return false;
}

protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
Expand Down
42 changes: 42 additions & 0 deletions llvm/include/llvm/IR/IntrinsicInst.h
Expand Up @@ -206,6 +206,48 @@ namespace llvm {
/// @}
};

/// This is the common base class for vector predication intrinsics.
class VPIntrinsic : public IntrinsicInst {
public:
static Optional<int> GetMaskParamPos(Intrinsic::ID IntrinsicID);
static Optional<int> GetVectorLengthParamPos(Intrinsic::ID IntrinsicID);

/// The llvm.vp.* intrinsics for this instruction Opcode
static Intrinsic::ID GetForOpcode(unsigned OC);

// Whether \p ID is a VP intrinsic ID.
static bool IsVPIntrinsic(Intrinsic::ID);

/// \return the mask parameter or nullptr.
Value *getMaskParam() const;

/// \return the vector length parameter or nullptr.
Value *getVectorLengthParam() const;

/// \return whether the vector length param can be ignored.
bool canIgnoreVectorLengthParam() const;

/// \return the static element count (vector number of elements) the vector
/// length parameter applies to.
ElementCount getStaticVectorLength() const;

// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return IsVPIntrinsic(I->getIntrinsicID());
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}

// Equivalent non-predicated opcode
unsigned getFunctionalOpcode() const {
return GetFunctionalOpcodeForVP(getIntrinsicID());
}

// Equivalent non-predicated opcode
static unsigned GetFunctionalOpcodeForVP(Intrinsic::ID ID);
};

/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
Expand Down
77 changes: 77 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Expand Up @@ -27,6 +27,10 @@ class IntrinsicProperty;
// effects. It may be CSE'd deleted if dead, etc.
def IntrNoMem : IntrinsicProperty;

// IntrNoSync - Threads executing the intrinsic will not synchronize using
// memory or other means.
def IntrNoSync : IntrinsicProperty;

// IntrReadMem - This intrinsic only reads from memory. It does not write to
// memory and has no other side effects. Therefore, it cannot be moved across
// potentially aliasing stores. However, it can be reordered otherwise and can
Expand Down Expand Up @@ -1153,6 +1157,79 @@ def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWil
def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;

//===---------------- Vector Predication Intrinsics --------------===//

// Binary operators
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
def int_vp_add : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_mul : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_or : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_and : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;

}


//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
Expand Down
84 changes: 84 additions & 0 deletions llvm/include/llvm/IR/VPIntrinsics.def
@@ -0,0 +1,84 @@
//===-- IR/VPIntrinsics.def - Describes llvm.vp.* Intrinsics -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains descriptions of the various Vector Predication intrinsics.
// This is used as a central place for enumerating the different instructions
// and should eventually be the place to put comments about the instructions.
//
//===----------------------------------------------------------------------===//

// NOTE: NO INCLUDE GUARD DESIRED!

// Provide definitions of macros so that users of this file do not have to
// define everything to use it...
//
#ifndef REGISTER_VP_INTRINSIC
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)
#endif

// Map this VP intrinsic to its functional Opcode
#ifndef HANDLE_VP_TO_OC
#define HANDLE_VP_TO_OC(VPID, OC)
#endif

///// Integer Arithmetic /////

// llvm.vp.add(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_add, 2, 3)
HANDLE_VP_TO_OC(vp_add, Add)

// llvm.vp.and(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_and, 2, 3)
HANDLE_VP_TO_OC(vp_and, And)

// llvm.vp.ashr(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_ashr, 2, 3)
HANDLE_VP_TO_OC(vp_ashr, AShr)

// llvm.vp.lshr(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_lshr, 2, 3)
HANDLE_VP_TO_OC(vp_lshr, LShr)

// llvm.vp.mul(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_mul, 2, 3)
HANDLE_VP_TO_OC(vp_mul, Mul)

// llvm.vp.or(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_or, 2, 3)
HANDLE_VP_TO_OC(vp_or, Or)

// llvm.vp.sdiv(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3)
HANDLE_VP_TO_OC(vp_sdiv, SDiv)

// llvm.vp.shl(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_shl, 2, 3)
HANDLE_VP_TO_OC(vp_shl, Shl)

// llvm.vp.srem(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_srem, 2, 3)
HANDLE_VP_TO_OC(vp_srem, SRem)

// llvm.vp.sub(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_sub, 2, 3)
HANDLE_VP_TO_OC(vp_sub, Sub)

// llvm.vp.udiv(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_udiv, 2, 3)
HANDLE_VP_TO_OC(vp_udiv, UDiv)

// llvm.vp.urem(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_urem, 2, 3)
HANDLE_VP_TO_OC(vp_urem, URem)

// llvm.vp.xor(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_xor, 2, 3)
HANDLE_VP_TO_OC(vp_xor, Xor)

#undef REGISTER_VP_INTRINSIC
#undef HANDLE_VP_TO_OC

0 comments on commit 733b319

Please sign in to comment.