-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner][VP] Add DAGCombine for VP_MUL #80105
Conversation
@llvm/pr-subscribers-llvm-selectiondag Author: Jianjian Guan (jacquesguan) ChangesUse visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL. Migrate from https://reviews.llvm.org/D121187 Patch is 38.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80105.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b17724cd07209..6268b6386fe4f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -439,7 +439,7 @@ namespace {
SDValue visitSUBE(SDNode *N);
SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
- SDValue visitMUL(SDNode *N);
+ template <class MatchContextClass> SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
@@ -1948,7 +1948,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT: return visitMULFIX(N);
- case ISD::MUL: return visitMUL(N);
+ case ISD::MUL:
+ return visitMUL<EmptyMatchContext>(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM:
@@ -4356,11 +4357,13 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitMUL(SDNode *N) {
+template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
+ bool IsVP = ISD::isVPOpcode(N->getOpcode());
+ MatchContextClass matcher(DAG, TLI, N);
// fold (mul x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
@@ -4373,7 +4376,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
+ return matcher.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
@@ -4381,8 +4384,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
+ // TODO: Change this to use SimplifyVBinOp when it supports VP op.
+ if (!IsVP)
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
assert((!N1IsConst ||
@@ -4404,12 +4409,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N1IsConst && ConstValue1.isOne())
return N0;
- if (SDValue NewSel = foldBinOpIntoSelect(N))
- return NewSel;
+ if (!IsVP)
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnes())
- return DAG.getNegative(N0, DL, VT);
+ return matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
@@ -4417,7 +4423,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ return matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
}
@@ -4428,26 +4434,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(Log2Val, DL, ShiftVT)));
+ return matcher.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ matcher.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL, ShiftVT)));
}
// Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
// hi result is in use in case we hit this mid-legalization.
- for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
- if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
- SDVTList LoHiVT = DAG.getVTList(VT, VT);
- // TODO: Can we match commutable operands with getNodeIfExists?
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
+ if (!IsVP)
+ for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+ SDVTList LoHiVT = DAG.getVTList(VT, VT);
+ // TODO: Can we match commutable operands with getNodeIfExists?
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ }
}
- }
// Try to transform:
// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
@@ -4464,7 +4471,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 0xf800 --> (x << 16) - (x << 11)
// x * -0x8800 --> -((x << 15) + (x << 11))
// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
- if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
+ if (!IsVP && N1IsConst &&
+ TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
@@ -4498,7 +4506,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N0.getOpcode() == ISD::SHL) {
+ if (matcher.match(N0, ISD::SHL)) {
SDValue N01 = N0.getOperand(1);
if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
@@ -4510,34 +4518,34 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue Sh, Y;
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
- if (N0.getOpcode() == ISD::SHL &&
+ if (matcher.match(N0, ISD::SHL) &&
isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
Sh = N0; Y = N1;
- } else if (N1.getOpcode() == ISD::SHL &&
+ } else if (matcher.match(N1, ISD::SHL) &&
isConstantOrConstantVector(N1.getOperand(1)) &&
N1->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
- return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
+ SDValue Mul = matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+ return matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N0.getOpcode() == ISD::ADD &&
+ if (matcher.match(N0, ISD::ADD) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
- return DAG.getNode(
+ return matcher.getNode(
ISD::ADD, DL, VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
+ matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+ matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
ConstantSDNode *NC1 = isConstOrConstSplat(N1);
- if (N0.getOpcode() == ISD::VSCALE && NC1) {
+ if (!IsVP && N0.getOpcode() == ISD::VSCALE && NC1) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(DL, VT, C0 * C1);
@@ -4545,7 +4553,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
APInt MulVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ if (!IsVP && N0.getOpcode() == ISD::STEP_VECTOR &&
ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
APInt NewStep = C0 * MulVal;
@@ -4583,13 +4591,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// reassociate mul
- if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
- return RMUL;
+ // TODO: Change reassociateOps to support vp ops.
+ if (!IsVP)
+ if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
+ return RMUL;
// Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
- if (SDValue SD =
- reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
- return SD;
+ // TODO: Change reassociateReduction to support vp ops.
+ if (!IsVP)
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+ return SD;
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -26421,6 +26433,10 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return visitFMA<VPMatchContext>(N);
case ISD::VP_SELECT:
return visitVP_SELECT(N);
+ case ISD::VP_MUL:
+ return visitMUL<VPMatchContext>(N);
+ default:
+ break;
}
return SDValue();
}
@@ -27578,6 +27594,10 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
+ if (Op.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getSplat(VT, DL,
+ DAG.getConstant(Pow2Constants.back().logBase2(), DL,
+ VT.getScalarType()));
SmallVector<SDValue> Log2Ops;
for (const APInt &Pow2 : Pow2Constants)
Log2Ops.emplace_back(
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
index fbb97d6bf3229..24555af611cd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
@@ -989,3 +989,285 @@ define <16 x i64> @vmul_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
%v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
ret <16 x i64> %v
}
+
+
+define <8 x i64> @vmul_vv_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vv_undef_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vv_undef_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.shl.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vmul_vshl_vx_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vx_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 56
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0,...
[truncated]
|
@@ -26421,6 +26433,10 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) { | |||
return visitFMA<VPMatchContext>(N); | |||
case ISD::VP_SELECT: | |||
return visitVP_SELECT(N); | |||
case ISD::VP_MUL: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a great PR for Loop Vectorize. We have too few optimizations for ISD::VP_*
, similar optimizations are akin to this one. I tested optimizing VP_UDIV
to VP_MUL
on the RVV1.0 development board and indeed gained benefits.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Because this patch has already undergone thorough review, and a similar one (#79101) has already been merged. I think it can be merged if there are no objections from others.
if (SDValue NewSel = foldBinOpIntoSelect(N)) | ||
return NewSel; | ||
if (!IsVP) | ||
if (SDValue NewSel = foldBinOpIntoSelect(N)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure about the correctness, but this patch has to be rebase first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is equivalent to no change because of !IsVP
here.
It looks like the majority of the comments were from before the switch to visitMUL. I'm still concerned about the maintainability by non-RISCV developers who don't care about VP. I'd like to hear from someone like @RKSimon. |
SDValue N0 = N->getOperand(0); | ||
SDValue N1 = N->getOperand(1); | ||
EVT VT = N0.getValueType(); | ||
SDLoc DL(N); | ||
bool IsVP = N->isVPOpcode(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't we previously use bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to this form.
|
||
// fold (mul x, -1) -> 0-x | ||
if (N1IsConst && ConstValue1.isAllOnes()) | ||
return DAG.getNegative(N0, DL, VT); | ||
return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Losing helpers like this is going to get annoying very quickly :(
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to add a parameter bool IsVP = false
to getNegative
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tested it and it seems that we should add the getNegative
function for EmptyMatchContext
and VPMatchContext
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems lots of amounts of helper functions that like getNegative
, maybe need to find a way to reuse the same logic to avoid duplicated code.
@mshockwave Is working on #78654 which should provide pattern matching helpers like we have for IR transforms and GlobalISel - it'd make sense if we can decide now how we can make VP work with such an approach. |
ping.. |
Have you looked at making this work with SDPatternMatch? |
CC @mshockwave who might be able to advise on how we can add VP_* support |
@jacquesguan you can try to use |
Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL. Differential Revision: https://reviews.llvm.org/D121187
Thanks, I changed |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks
Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL. Migrate from https://reviews.llvm.org/D121187
Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL.
Migrate from https://reviews.llvm.org/D121187