Skip to content

Commit

Permalink
[RISCV] Optimize INSERT_VECTOR_ELT sequences
Browse files Browse the repository at this point in the history
This patch optimizes the codegen for INSERT_VECTOR_ELT in various ways.
Primarily, it removes the use of vslidedown during lowering, and the
vector element is inserted entirely using vslideup with a custom VL and
slide index.

Additionally, lowering of i64-element vectors on RV32 has been optimized
in several ways. When the 64-bit value to insert is the same as the
sign-extension of the lower 32-bits, the codegen can follow the regular
path. When this is not possible, a new sequence of two i32 vslide1up
instructions is used to get the vector element into a vector. This
sequence was suggested by @craig.topper. From there, the value is slid
into the final position for more consistent lowering across RV32 and
RV64.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D98250
  • Loading branch information
frasercrmck committed Mar 12, 2021
1 parent 4d2d585 commit 641f570
Show file tree
Hide file tree
Showing 8 changed files with 539 additions and 523 deletions.
101 changes: 62 additions & 39 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2218,6 +2218,12 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
}

// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the
// original vector (with an undisturbed tail policy for elements >= VL), we
// achieve the desired result of leaving all elements untouched except the one
// at VL-1, which is replaced with the desired value.
SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
Expand All @@ -2233,51 +2239,67 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}

MVT XLenVT = Subtarget.getXLenVT();

SDValue Zero = DAG.getConstant(0, DL, XLenVT);
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
// Even i64-element vectors on RV32 can be lowered without scalar
// legalization if the most-significant 32 bits of the value are not affected
// by the sign-extension of the lower 32 bits.
// TODO: We could also catch sign extensions of a 32-bit value.
if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
const auto *CVal = cast<ConstantSDNode>(Val);
if (isInt<32>(CVal->getSExtValue())) {
IsLegalInsert = true;
Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
}
}

SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);

// Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
// first slid down into position, the value is inserted into the first
// position, and the vector is slid back up. We do this to simplify patterns.
// (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) {
SDValue ValInVec;

if (IsLegalInsert) {
if (isNullConstant(Idx))
return DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Vec, Val, VL);
SDValue Slidedown =
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
SDValue InsertElt0 =
DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT, Slidedown, Val, VL);
return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, InsertElt0,
Idx, Mask, VL);
}

// Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
// is illegal (currently only vXi64 RV32).
// Since there is no easy way of getting a single element into a vector when
// XLEN<SEW, we lower the operation to the following sequence:
// splat vVal, rVal
// vid.v vVid
// vmseq.vx mMask, vVid, rIdx
// vmerge.vvm vDest, vSrc, vVal, mMask
// This essentially merges the original vector with the inserted element by
// using a mask whose only set bit is that corresponding to the insert
// index.
SDValue SplattedVal = DAG.getSplatVector(ContainerVT, DL, Val);
SDValue SplattedIdx =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, Idx, VL);

SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
auto SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ContainerVT);
SDValue SelectCond =
DAG.getNode(RISCVISD::SETCC_VL, DL, SetCCVT, VID, SplattedIdx,
DAG.getCondCode(ISD::SETEQ), Mask, VL);
SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT,
SelectCond, SplattedVal, Vec, VL);
ValInVec = DAG.getNode(RISCVISD::VMV_S_XF_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Val, VL);
} else {
// On RV32, i64-element vectors must be specially handled to place the
// value at element 0, by using two vslide1up instructions in sequence on
// the i32 split lo/hi value. Use an equivalently-sized i32 vector for
// this.
SDValue One = DAG.getConstant(1, DL, XLenVT);
SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
MVT I32ContainerVT =
MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
SDValue I32Mask =
getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
// Limit the active VL to two.
SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
// Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
// undef doesn't obey the earlyclobber constraint. Just splat a zero value.
ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
InsertI64VL);
// First slide in the hi value, then the lo in underneath it.
ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
ValHi, I32Mask, InsertI64VL);
ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
ValLo, I32Mask, InsertI64VL);
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
}

// Now that the value is in a vector, slide it into position.
SDValue InsertVL =
DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
ValInVec, Idx, Mask, InsertVL);
if (!VecVT.isFixedLengthVector())
return Select;
return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
return Slideup;
return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
}

// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
Expand Down Expand Up @@ -6100,6 +6122,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VLEFF)
NODE_NAME_CASE(VLEFF_MASK)
NODE_NAME_CASE(VSLIDEUP_VL)
NODE_NAME_CASE(VSLIDE1UP_VL)
NODE_NAME_CASE(VSLIDEDOWN_VL)
NODE_NAME_CASE(VID_VL)
NODE_NAME_CASE(VFNCVT_ROD_VL)
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ enum NodeType : unsigned {
// and the fifth the VL.
VSLIDEUP_VL,
VSLIDEDOWN_VL,
// Matches the semantics of vslide1up. The first operand is the source
// vector, the second is the XLenVT scalar value. The third and fourth
// operands are the mask and VL operands.
VSLIDE1UP_VL,
// Matches the semantics of the vid.v instruction, with a mask and VL
// operand.
VID_VL,
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -1129,8 +1129,13 @@ def SDTRVVSlide : SDTypeProfile<1, 5, [
SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>,
SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT>
]>;
def SDTRVVSlide1 : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisVT<2, XLenVT>,
SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>
]>;

def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;

let Predicates = [HasStdExtV] in {
Expand All @@ -1157,6 +1162,12 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
GPR:$vl, vti.SEW)>;

def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rs1),
GPR:$rs2, (vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>;

def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
(vti.Vector vti.RegClass:$rs1),
uimm5:$rs2, (vti.Mask true_mask),
Expand Down
Loading

0 comments on commit 641f570

Please sign in to comment.