Skip to content

Commit

Permalink
[RISCV] Move vmv_s_x and vfmv_s_f special casing to DAG combine
Browse files Browse the repository at this point in the history
We'd discussed this in the original set of patches months ago, but decided against it. I think we should reverse ourselves here as the code is significantly more readable, and we do pick up cases we'd missed by not calling the appropriate helper routine.

Differential Revision: https://reviews.llvm.org/D158854
  • Loading branch information
preames committed Aug 30, 2023
1 parent e015d38 commit fd465f3
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 72 deletions.
67 changes: 33 additions & 34 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3698,20 +3698,10 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
DAG.getConstant(0, DL, XLenVT));
}

if (VT.isFloatingPoint()) {
// TODO: Use vmv.v.i for appropriate constants
// Use M1 or smaller to avoid over constraining register allocation
const MVT M1VT = getLMUL1VT(VT);
auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
DAG.getUNDEF(InnerVT), Scalar, VL);
if (VT != InnerVT)
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
DAG.getUNDEF(VT),
Result, DAG.getConstant(0, DL, XLenVT));
return Result;
}

if (VT.isFloatingPoint())
return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
DAG.getUNDEF(VT), Scalar, VL);

// Avoid the tricky legalization cases by falling back to using the
// splat code which already handles it gracefully.
Expand All @@ -3727,24 +3717,8 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
// higher would involve overly constraining the register allocator for
// no purpose.
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
VT.bitsLE(getLMUL1VT(VT)))
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
}
// Use M1 or smaller to avoid over constraining register allocation
const MVT M1VT = getLMUL1VT(VT);
auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
DAG.getUNDEF(InnerVT), Scalar, VL);
if (VT != InnerVT)
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
DAG.getUNDEF(VT),
Result, DAG.getConstant(0, DL, XLenVT));
return Result;
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
DAG.getUNDEF(VT), Scalar, VL);
}

// Is this a shuffle extracts either the even or odd elements of a vector?
Expand Down Expand Up @@ -13386,6 +13360,8 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
const MVT XLenVT = Subtarget.getXLenVT();
SDLoc DL(N);

// Helper to call SimplifyDemandedBits on an operand of N where only some low
// bits are demanded. N will be added to the Worklist if it was not deleted.
Expand Down Expand Up @@ -13417,8 +13393,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DCI.CombineTo(N, Lo, Hi);
}

SDLoc DL(N);

// It's cheaper to materialise two 32-bit integers than to load a double
// from the constant pool and transfer it to integer registers through the
// stack.
Expand Down Expand Up @@ -13752,7 +13726,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

}
EVT IndexVT = Index.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
// RISC-V indexed loads only support the "unsigned unscaled" addressing
// mode, so anything else must be manually legalized.
bool NeedsIdxLegalization =
Expand Down Expand Up @@ -14002,6 +13975,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Src.getOperand(0);
// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
}
[[fallthrough]];
}
case RISCVISD::VMV_S_X_VL: {
const MVT VT = N->getSimpleValueType(0);
SDValue Passthru = N->getOperand(0);
SDValue Scalar = N->getOperand(1);
SDValue VL = N->getOperand(2);

// Use M1 or smaller to avoid over constraining register allocation
const MVT M1VT = getLMUL1VT(VT);
if (M1VT.bitsLT(VT) && Passthru.isUndef()) {
SDValue Result =
DAG.getNode(N->getOpcode(), DL, M1VT, Passthru, Scalar, VL);
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
Result, DAG.getConstant(0, DL, XLenVT));
return Result;
}

// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
// higher would involve overly constraining the register allocator for
// no purpose.
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);

break;
}
case ISD::INTRINSIC_VOID:
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -419,19 +419,17 @@ define void @insertelt_v8i64_0_store(ptr %x) {
define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
; RV32-LABEL: insertelt_v8i64:
; RV32: # %bb.0:
; RV32-NEXT: li a1, -1
; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV32-NEXT: vmv.s.x v12, a1
; RV32-NEXT: vmv.v.i v12, -1
; RV32-NEXT: addi a1, a0, 1
; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
; RV32-NEXT: vslideup.vx v8, v12, a0
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a1
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: addi a1, a0, 1
Expand Down Expand Up @@ -499,19 +497,17 @@ define void @insertelt_c6_v8i64_0_store(ptr %x) {
define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
; RV32-LABEL: insertelt_c6_v8i64:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 6
; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV32-NEXT: vmv.s.x v12, a1
; RV32-NEXT: vmv.v.i v12, 6
; RV32-NEXT: addi a1, a0, 1
; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
; RV32-NEXT: vslideup.vx v8, v12, a0
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_c6_v8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a1, 6
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a1
; RV64-NEXT: vmv.v.i v12, 6
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: addi a1, a0, 1
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -696,18 +696,18 @@ define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3,
; CHECK-NEXT: vsrl.vi v8, v8, 1
; CHECK-NEXT: vadd.vi v8, v8, 3
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: vse16.v v8, (a2)
; CHECK-NEXT: vse16.v v8, (a3)
; CHECK-NEXT: vse16.v v8, (a4)
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vmv.v.i v9, 4
; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vse16.v v9, (a5)
; CHECK-NEXT: vse16.v v8, (a5)
; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v8, v9, a0
; CHECK-NEXT: vse16.v v8, (a6)
; CHECK-NEXT: ret
store <4 x i16> <i16 3, i16 3, i16 4, i16 4>, ptr %z0
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -945,15 +945,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: li a1, 1
; RV64-NEXT: vmv.v.i v12, 7
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: slli a3, a2, 4
; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill
; RV64-NEXT: vmv.s.x v16, a1
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a2, a1, 4
; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, 1
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: add a1, sp, a1
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -781,9 +781,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_0_c10(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_imm_c10(<vscale x 2 x i64> %v) {
; CHECK-LABEL: insertelt_nxv2i64_imm_c10:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 10
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 10
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 3
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i64> %v, i64 10, i32 3
Expand All @@ -793,9 +793,8 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_imm_c10(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_idx_c10(<vscale x 2 x i64> %v, i32 %idx) {
; CHECK-LABEL: insertelt_nxv2i64_idx_c10:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 10
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 10
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
Expand All @@ -818,9 +817,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_0_cn1(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_imm_cn1(<vscale x 2 x i64> %v) {
; CHECK-LABEL: insertelt_nxv2i64_imm_cn1:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -1
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 3
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i64> %v, i64 -1, i32 3
Expand All @@ -830,9 +829,8 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_imm_cn1(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_idx_cn1(<vscale x 2 x i64> %v, i32 %idx) {
; CHECK-LABEL: insertelt_nxv2i64_idx_cn1:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, -1
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ define i32 @splat_vector_split_i64() {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: li a0, 3
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 3
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 3
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -668,10 +668,10 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: vmv.v.i v10, 1
; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0
; RV32MV-NEXT: vand.vv v8, v8, v10
; RV32MV-NEXT: li a0, 2
; RV32MV-NEXT: vmv.s.x v10, a0
; RV32MV-NEXT: li a0, 1
; RV32MV-NEXT: vmv.s.x v12, a0
; RV32MV-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV32MV-NEXT: vmv.v.i v10, 2
; RV32MV-NEXT: vmv.v.i v12, 1
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32MV-NEXT: vmv.v.i v14, 0
; RV32MV-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV32MV-NEXT: vslideup.vi v14, v12, 2
Expand Down

0 comments on commit fd465f3

Please sign in to comment.