Skip to content

Commit

Permalink
Revert "[RISCV] Lower more BUILD_VECTOR sequences to RVV's VID"
Browse files Browse the repository at this point in the history
This reverts commit a6ca88e.

More caution is required to avoid overflow/underflow. Thanks to the
santizers for catching this.
  • Loading branch information
frasercrmck committed Jul 16, 2021
1 parent adee89f commit e3fa2b1
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 444 deletions.
109 changes: 11 additions & 98 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -1384,70 +1384,6 @@ static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}

// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
// to the (non-zero) step S and start value X. This can be then lowered as the
// RVV sequence (VID * S) + X, for example.
// Note that this method will also match potentially unappealing index
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
// determine whether this is worth generating code for.
static Optional<std::pair<int64_t, int64_t>> isSimpleVIDSequence(SDValue Op) {
unsigned NumElts = Op.getNumOperands();
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
if (!Op.getValueType().isInteger())
return None;

Optional<int64_t> SeqStep, SeqAddend;
Optional<std::pair<int64_t, unsigned>> PrevElt;
unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
for (unsigned Idx = 0; Idx < NumElts; Idx++) {
// Assume undef elements match the sequence; we just have to be careful
// when interpolating across them.
if (Op.getOperand(Idx).isUndef())
continue;
// The BUILD_VECTOR must be all constants.
if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
return None;

int64_t Val = SignExtend64(Op.getConstantOperandVal(Idx), EltSizeInBits);

if (PrevElt) {
// Calculate the step since the last non-undef element, and ensure
// it's consistent across the entire sequence.
int64_t Diff = Val - PrevElt->first;
// The difference must cleanly divide the element span.
if (Diff % (Idx - PrevElt->second) != 0)
return None;
int64_t Step = Diff / (Idx - PrevElt->second);
// A zero step indicates we're either a not an index sequence, or we
// have a fractional step. This must be handled by a more complex
// pattern recognition (undefs complicate things here).
if (Step == 0)
return None;
if (!SeqStep)
SeqStep = Step;
else if (Step != SeqStep)
return None;
}

// Record and/or check any addend.
if (SeqStep) {
int64_t Addend = Val - (Idx * *SeqStep);
if (!SeqAddend)
SeqAddend = Addend;
else if (SeqAddend != Addend)
return None;
}

// Record this non-undef element for later.
PrevElt = std::make_pair(Val, Idx);
}
// We need to have logged both a step and an addend for this to count as
// a legal index sequence.
if (!SeqStep || !SeqAddend)
return None;
return std::make_pair(*SeqStep, *SeqAddend);
}

static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
Expand Down Expand Up @@ -1575,41 +1511,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}

// Try and match index sequences, which we can lower to the vid instruction
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
if (auto SimpleVID = isSimpleVIDSequence(Op)) {
// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
// threshold since it's the immediate value many RVV instructions accept.
if (isInt<5>(SimpleVID->first) && isInt<5>(SimpleVID->second)) {
// Try and match an index sequence, which we can lower directly to the vid
// instruction. An all-undef vector is matched by getSplatValue, above.
if (VT.isInteger()) {
bool IsVID = true;
for (unsigned I = 0; I < NumElts && IsVID; I++)
IsVID &= Op.getOperand(I).isUndef() ||
(isa<ConstantSDNode>(Op.getOperand(I)) &&
Op.getConstantOperandVal(I) == I);

if (IsVID) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
// these, we'd lose the fixed-length vector info and generate worse
// vsetvli code.
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
int64_t Step = SimpleVID->first;
int64_t Addend = SimpleVID->second;
assert(Step != 0 && "Invalid step");
bool Negate = false;
if (Step != 1) {
int64_t SplatStepVal = Step;
unsigned Opcode = ISD::MUL;
if (isPowerOf2_64(std::abs(Step))) {
Negate = Step < 0;
Opcode = ISD::SHL;
SplatStepVal = Log2_64(std::abs(Step));
}
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
SDValue SplatAddend =
DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
}
return VID;
return convertFromScalableVector(VT, VID, DAG, Subtarget);
}
}

Expand Down
34 changes: 20 additions & 14 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
Expand Up @@ -175,29 +175,34 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
; RV32-NEXT: addi a0, zero, 12
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vid.v v25
; RV32-NEXT: vrsub.vi v25, v25, 4
; RV32-NEXT: lui a0, %hi(.LCPI7_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0)
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v26, (a0), zero
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
; RV32-NEXT: lui a0, 16
; RV32-NEXT: addi a0, a0, 2
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32-NEXT: vmv.v.x v25, a0
; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_xv_v4f64:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, zero, 2
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-NEXT: vmv.s.x v26, a0
; RV64-NEXT: vmv.v.i v28, 1
; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, mu
; RV64-NEXT: vslideup.vi v28, v26, 2
; RV64-NEXT: addi a0, zero, 12
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-NEXT: lui a0, %hi(.LCPI7_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0)
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-NEXT: vlse64.v v26, (a0), zero
; RV64-NEXT: vid.v v28
; RV64-NEXT: vrsub.vi v28, v28, 4
; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
; RV64-NEXT: vrgather.vv v26, v8, v28, v0.t
; RV64-NEXT: vmv2r.v v8, v26
Expand All @@ -209,27 +214,28 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
; RV32-LABEL: vrgather_shuffle_vx_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vid.v v25
; RV32-NEXT: addi a0, zero, 3
; RV32-NEXT: vmul.vx v25, v25, a0
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: lui a0, %hi(.LCPI8_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI8_0)
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v26, (a0), zero
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
; RV32-NEXT: lui a0, 48
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32-NEXT: vmv.v.x v25, a0
; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_vx_v4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-NEXT: vid.v v26
; RV64-NEXT: vmv.v.i v28, 3
; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
; RV64-NEXT: vmv.s.x v28, zero
; RV64-NEXT: addi a0, zero, 3
; RV64-NEXT: vmul.vx v28, v26, a0
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: lui a0, %hi(.LCPI8_0)
Expand Down

0 comments on commit e3fa2b1

Please sign in to comment.