Skip to content

Commit

Permalink
[VP][RISCV] Add vp.bswap and RISC-V support.
Browse files Browse the repository at this point in the history
The patch also added function expandVPBSWAP to expand ISD::VP_BSWAP nodes.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D137928
  • Loading branch information
yetingk committed Nov 16, 2022
1 parent 8ef1cc9 commit 5c3ca10
Show file tree
Hide file tree
Showing 13 changed files with 3,365 additions and 0 deletions.
49 changes: 49 additions & 0 deletions llvm/docs/LangRef.rst
Expand Up @@ -15233,6 +15233,8 @@ The ``llvm.bitreverse.iN`` intrinsic returns an iN value that has bit
intrinsics, such as ``llvm.bitreverse.v4i32``, operate on a per-element
basis and the element order is not affected.

.. _int_bswap:

'``llvm.bswap.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -21999,6 +22001,53 @@ Examples:
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison

.. _int_vp_bswap:

'``llvm.vp.bswap.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""
This is an overloaded intrinsic.

::

declare <16 x i32> @llvm.vp.bswap.v16i32 (<16 x i32> <op>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32 (<vscale x 4 x i32> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x i64> @llvm.vp.bswap.v256i64 (<256 x i64> <op>, <256 x i1> <mask>, i32 <vector_length>)

Overview:
"""""""""

Predicated bswap of two vectors of integers.


Arguments:
""""""""""

The first operand and the result have the same vector of integer type. The
second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.

Semantics:
""""""""""

The '``llvm.vp.bswap``' intrinsic performs bswap (:ref:`bswap <int_bswap>`) of the first operand on each
enabled lane. The result on disabled lanes is a :ref:`poison value <poisonvalues>`.

Examples:
"""""""""

.. code-block:: llvm

%r = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r

%t = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison


.. _int_mload_mstore:

Masked Vector Load and Store Intrinsics
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -4918,6 +4918,11 @@ class TargetLowering : public TargetLoweringBase {
/// \returns The expansion result or SDValue() if it fails.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const;

/// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with
/// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node
/// to expand \returns The expansion result or SDValue() if it fails.
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const;

/// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes.
/// Returns SDValue() if expand fails.
/// \param N Node to expand
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Expand Up @@ -1558,6 +1558,10 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_bswap : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;

// Floating-point arithmetic
def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/VPIntrinsics.def
Expand Up @@ -215,6 +215,10 @@ END_REGISTER_VP(vp_umin, VP_UMIN)
BEGIN_REGISTER_VP(vp_umax, 2, 3, VP_UMAX, -1)
VP_PROPERTY_BINARYOP
END_REGISTER_VP(vp_umax, VP_UMAX)

// llvm.vp.bswap(x,mask,vlen)
BEGIN_REGISTER_VP(vp_bswap, 1, 2, VP_BSWAP, -1)
END_REGISTER_VP(vp_bswap, VP_BSWAP)
///// } Integer Arithmetic

///// Floating-Point Arithmetic {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
Expand Up @@ -730,6 +730,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node));
return;
case ISD::VP_BSWAP:
Results.push_back(TLI.expandVPBSWAP(Node, DAG));
return;
case ISD::VSELECT:
Results.push_back(ExpandVSELECT(Node));
return;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Expand Up @@ -1013,6 +1013,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::VP_BSWAP:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
Expand Down Expand Up @@ -4084,6 +4085,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::VP_BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
Expand Down
76 changes: 76 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -8388,6 +8388,82 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
}
}

SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
SDValue Mask = N->getOperand(1);
SDValue EVL = N->getOperand(2);

if (!VT.isSimple())
return SDValue();

EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().getScalarType().SimpleTy) {
default:
return SDValue();
case MVT::i16:
Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
case MVT::i32:
Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
Mask, EVL);
Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
Mask, EVL);
Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
Mask, EVL);
Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
case MVT::i64:
Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
Mask, EVL);
Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
Mask, EVL);
Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
Mask, EVL);
Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
Mask, EVL);
Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
Mask, EVL);
Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
}
}

SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -591,6 +591,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Expand);

setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::VP_BSWAP, VT, Expand);

// Custom-lower extensions and truncations from/to mask types.
setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Expand Up @@ -415,6 +415,32 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
{Intrinsic::bswap, MVT::nxv2i64, 31},
{Intrinsic::bswap, MVT::nxv4i64, 31},
{Intrinsic::bswap, MVT::nxv8i64, 31},
{Intrinsic::vp_bswap, MVT::v2i16, 3},
{Intrinsic::vp_bswap, MVT::v4i16, 3},
{Intrinsic::vp_bswap, MVT::v8i16, 3},
{Intrinsic::vp_bswap, MVT::v16i16, 3},
{Intrinsic::vp_bswap, MVT::nxv1i16, 3},
{Intrinsic::vp_bswap, MVT::nxv2i16, 3},
{Intrinsic::vp_bswap, MVT::nxv4i16, 3},
{Intrinsic::vp_bswap, MVT::nxv8i16, 3},
{Intrinsic::vp_bswap, MVT::nxv16i16, 3},
{Intrinsic::vp_bswap, MVT::v2i32, 12},
{Intrinsic::vp_bswap, MVT::v4i32, 12},
{Intrinsic::vp_bswap, MVT::v8i32, 12},
{Intrinsic::vp_bswap, MVT::v16i32, 12},
{Intrinsic::vp_bswap, MVT::nxv1i32, 12},
{Intrinsic::vp_bswap, MVT::nxv2i32, 12},
{Intrinsic::vp_bswap, MVT::nxv4i32, 12},
{Intrinsic::vp_bswap, MVT::nxv8i32, 12},
{Intrinsic::vp_bswap, MVT::nxv16i32, 12},
{Intrinsic::vp_bswap, MVT::v2i64, 31},
{Intrinsic::vp_bswap, MVT::v4i64, 31},
{Intrinsic::vp_bswap, MVT::v8i64, 31},
{Intrinsic::vp_bswap, MVT::v16i64, 31},
{Intrinsic::vp_bswap, MVT::nxv1i64, 31},
{Intrinsic::vp_bswap, MVT::nxv2i64, 31},
{Intrinsic::vp_bswap, MVT::nxv4i64, 31},
{Intrinsic::vp_bswap, MVT::nxv8i64, 31},
{Intrinsic::bitreverse, MVT::v2i8, 17},
{Intrinsic::bitreverse, MVT::v4i8, 17},
{Intrinsic::bitreverse, MVT::v8i8, 17},
Expand Down

0 comments on commit 5c3ca10

Please sign in to comment.