Skip to content

Commit

Permalink
[RISCV] Avoid generate large LMUL vmv.s.x or fvmv.s.f
Browse files Browse the repository at this point in the history
This is a follow up to patch discussion on D139656. As noted there, M2/M4/M8 versions of these instructions don't actually exist, and using them results in overly constrained register allocation.

In that review, we'd talked about moving towards a variant of the instructions which ignored LMUL. I decided to see what happened if we just stopped generating the high LMUL variants, and the results are surprisingly neutral. I only see one minor thing which looks like a real regression among all the churn. I think this is worth doing now to loosen register allocation constraints, and avoid digging our hole around these instructions deeper while thinking about the right model change.

Differential Revision: https://reviews.llvm.org/D140027
  • Loading branch information
preames committed Dec 14, 2022
1 parent 7b68411 commit d860119
Show file tree
Hide file tree
Showing 8 changed files with 278 additions and 255 deletions.
30 changes: 26 additions & 4 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -2816,14 +2816,26 @@ static MVT getLMUL1VT(MVT VT) {
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL,
MVT VT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
const MVT XLenVT = Subtarget.getXLenVT();

SDValue Passthru = DAG.getUNDEF(VT);
if (VT.isFloatingPoint())
if (VT.isFloatingPoint()) {
// TODO: Use vmv.v.i for appropriate constants
return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
// Use M1 or smaller to avoid over constraining register allocation
const MVT M1VT = getLMUL1VT(VT);
auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
DAG.getUNDEF(InnerVT), Scalar, VL);
if (VT != InnerVT)
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
DAG.getUNDEF(VT),
Result, DAG.getConstant(0, DL, XLenVT));
return Result;
}


// Avoid the tricky legalization cases by falling back to using the
// splat code which already handles it gracefully.
const MVT XLenVT = Subtarget.getXLenVT();
if (!Scalar.getValueType().bitsLE(XLenVT))
return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
DAG.getConstant(1, DL, XLenVT),
Expand All @@ -2844,7 +2856,17 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL,
VT.bitsLE(getLMUL1VT(VT)))
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
}
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
// Use M1 or smaller to avoid over constraining register allocation
const MVT M1VT = getLMUL1VT(VT);
auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
DAG.getUNDEF(InnerVT), Scalar, VL);
if (VT != InnerVT)
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
DAG.getUNDEF(VT),
Result, DAG.getConstant(0, DL, XLenVT));
return Result;

}

static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
Expand Up @@ -109,13 +109,13 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv.s.x v12, a0
; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma
; CHECK-NEXT: vslideup.vi v12, v8, 1
; CHECK-NEXT: vslideup.vi v8, v12, 1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vand.vi v8, v12, 1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%y = insertelement <64 x i1> %x, i1 %elt, i64 1
Expand All @@ -127,14 +127,14 @@ define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) n
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 64
; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv.s.x v12, a0
; CHECK-NEXT: addi a0, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
; CHECK-NEXT: vslideup.vx v12, v8, a1
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
; CHECK-NEXT: vand.vi v8, v12, 1
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%y = insertelement <64 x i1> %x, i1 %elt, i32 %idx
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Expand Up @@ -202,8 +202,9 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV64-LABEL: vrgather_shuffle_vv_v8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a0, 5
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v16, a0
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vmv.v.i v20, 2
; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; RV64-NEXT: vslideup.vi v20, v16, 7
Expand Down

0 comments on commit d860119

Please sign in to comment.