[DAGCombiner][VP] Add DAGCombine for VP_MUL #80105

jacquesguan · 2024-01-31T06:25:52Z

Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL.

Migrate from https://reviews.llvm.org/D121187

llvmbot · 2024-01-31T06:26:27Z

@llvm/pr-subscribers-llvm-selectiondag

Author: Jianjian Guan (jacquesguan)

Changes

Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL.

Migrate from https://reviews.llvm.org/D121187

Patch is 38.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80105.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+62-42)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll (+282)
(modified) llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll (+257)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b17724cd07209..6268b6386fe4f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -439,7 +439,7 @@ namespace {
     SDValue visitSUBE(SDNode *N);
     SDValue visitUSUBO_CARRY(SDNode *N);
     SDValue visitSSUBO_CARRY(SDNode *N);
-    SDValue visitMUL(SDNode *N);
+    template <class MatchContextClass> SDValue visitMUL(SDNode *N);
     SDValue visitMULFIX(SDNode *N);
     SDValue useDivRem(SDNode *N);
     SDValue visitSDIV(SDNode *N);
@@ -1948,7 +1948,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SMULFIXSAT:
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:         return visitMULFIX(N);
-  case ISD::MUL:                return visitMUL(N);
+  case ISD::MUL:
+    return visitMUL<EmptyMatchContext>(N);
   case ISD::SDIV:               return visitSDIV(N);
   case ISD::UDIV:               return visitUDIV(N);
   case ISD::SREM:
@@ -4356,11 +4357,13 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitMUL(SDNode *N) {
+template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
   SDLoc DL(N);
+  bool IsVP = ISD::isVPOpcode(N->getOpcode());
+  MatchContextClass matcher(DAG, TLI, N);
 
   // fold (mul x, undef) -> 0
   if (N0.isUndef() || N1.isUndef())
@@ -4373,7 +4376,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   // canonicalize constant to RHS (vector doesn't have to splat)
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
-    return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
+    return matcher.getNode(ISD::MUL, DL, VT, N1, N0);
 
   bool N1IsConst = false;
   bool N1IsOpaqueConst = false;
@@ -4381,8 +4384,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
-      return FoldedVOp;
+    // TODO: Change this to use SimplifyVBinOp when it supports VP op.
+    if (!IsVP)
+      if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+        return FoldedVOp;
 
     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
     assert((!N1IsConst ||
@@ -4404,12 +4409,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   if (N1IsConst && ConstValue1.isOne())
     return N0;
 
-  if (SDValue NewSel = foldBinOpIntoSelect(N))
-    return NewSel;
+  if (!IsVP)
+    if (SDValue NewSel = foldBinOpIntoSelect(N))
+      return NewSel;
 
   // fold (mul x, -1) -> 0-x
   if (N1IsConst && ConstValue1.isAllOnes())
-    return DAG.getNegative(N0, DL, VT);
+    return matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
 
   // fold (mul x, (1 << c)) -> x << c
   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
@@ -4417,7 +4423,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
-      return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+      return matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
     }
   }
 
@@ -4428,26 +4434,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 
     // FIXME: If the input is something that is easily negated (e.g. a
     // single-use add), we should put the negate there.
-    return DAG.getNode(ISD::SUB, DL, VT,
-                       DAG.getConstant(0, DL, VT),
-                       DAG.getNode(ISD::SHL, DL, VT, N0,
-                            DAG.getConstant(Log2Val, DL, ShiftVT)));
+    return matcher.getNode(
+        ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+        matcher.getNode(ISD::SHL, DL, VT, N0,
+                        DAG.getConstant(Log2Val, DL, ShiftVT)));
   }
 
   // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
   // hi result is in use in case we hit this mid-legalization.
-  for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
-    if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
-      SDVTList LoHiVT = DAG.getVTList(VT, VT);
-      // TODO: Can we match commutable operands with getNodeIfExists?
-      if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
-        if (LoHi->hasAnyUseOfValue(1))
-          return SDValue(LoHi, 0);
-      if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
-        if (LoHi->hasAnyUseOfValue(1))
-          return SDValue(LoHi, 0);
+  if (!IsVP)
+    for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+      if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+        SDVTList LoHiVT = DAG.getVTList(VT, VT);
+        // TODO: Can we match commutable operands with getNodeIfExists?
+        if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+          if (LoHi->hasAnyUseOfValue(1))
+            return SDValue(LoHi, 0);
+        if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+          if (LoHi->hasAnyUseOfValue(1))
+            return SDValue(LoHi, 0);
+      }
     }
-  }
 
   // Try to transform:
   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
@@ -4464,7 +4471,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   //           x * 0xf800 --> (x << 16) - (x << 11)
   //           x * -0x8800 --> -((x << 15) + (x << 11))
   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
-  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
+  if (!IsVP && N1IsConst &&
+      TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
     // TODO: We could handle more general decomposition of any constant by
     //       having the target set a limit on number of ops and making a
     //       callback to determine that sequence (similar to sqrt expansion).
@@ -4498,7 +4506,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   }
 
   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
-  if (N0.getOpcode() == ISD::SHL) {
+  if (matcher.match(N0, ISD::SHL)) {
     SDValue N01 = N0.getOperand(1);
     if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
       return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
@@ -4510,34 +4518,34 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
     SDValue Sh, Y;
 
     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
-    if (N0.getOpcode() == ISD::SHL &&
+    if (matcher.match(N0, ISD::SHL) &&
         isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
       Sh = N0; Y = N1;
-    } else if (N1.getOpcode() == ISD::SHL &&
+    } else if (matcher.match(N1, ISD::SHL) &&
                isConstantOrConstantVector(N1.getOperand(1)) &&
                N1->hasOneUse()) {
       Sh = N1; Y = N0;
     }
 
     if (Sh.getNode()) {
-      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
-      return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
+      SDValue Mul = matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+      return matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
     }
   }
 
   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
-  if (N0.getOpcode() == ISD::ADD &&
+  if (matcher.match(N0, ISD::ADD) &&
       DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
       isMulAddWithConstProfitable(N, N0, N1))
-    return DAG.getNode(
+    return matcher.getNode(
         ISD::ADD, DL, VT,
-        DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
-        DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
+        matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+        matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
 
   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
   ConstantSDNode *NC1 = isConstOrConstSplat(N1);
-  if (N0.getOpcode() == ISD::VSCALE && NC1) {
+  if (!IsVP && N0.getOpcode() == ISD::VSCALE && NC1) {
     const APInt &C0 = N0.getConstantOperandAPInt(0);
     const APInt &C1 = NC1->getAPIntValue();
     return DAG.getVScale(DL, VT, C0 * C1);
@@ -4545,7 +4553,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 
   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
   APInt MulVal;
-  if (N0.getOpcode() == ISD::STEP_VECTOR &&
+  if (!IsVP && N0.getOpcode() == ISD::STEP_VECTOR &&
       ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
     const APInt &C0 = N0.getConstantOperandAPInt(0);
     APInt NewStep = C0 * MulVal;
@@ -4583,13 +4591,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   }
 
   // reassociate mul
-  if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
-    return RMUL;
+  // TODO: Change reassociateOps to support vp ops.
+  if (!IsVP)
+    if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
+      return RMUL;
 
   // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
-  if (SDValue SD =
-          reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
-    return SD;
+  // TODO: Change reassociateReduction to support vp ops.
+  if (!IsVP)
+    if (SDValue SD =
+            reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+      return SD;
 
   // Simplify the operands using demanded-bits information.
   if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -26421,6 +26433,10 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
       return visitFMA<VPMatchContext>(N);
     case ISD::VP_SELECT:
       return visitVP_SELECT(N);
+    case ISD::VP_MUL:
+      return visitMUL<VPMatchContext>(N);
+    default:
+      break;
     }
     return SDValue();
   }
@@ -27578,6 +27594,10 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
     if (!VT.isVector())
       return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
     // We need to create a build vector
+    if (Op.getOpcode() == ISD::SPLAT_VECTOR)
+      return DAG.getSplat(VT, DL,
+                          DAG.getConstant(Pow2Constants.back().logBase2(), DL,
+                                          VT.getScalarType()));
     SmallVector<SDValue> Log2Ops;
     for (const APInt &Pow2 : Pow2Constants)
       Log2Ops.emplace_back(
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
index fbb97d6bf3229..24555af611cd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
@@ -989,3 +989,285 @@ define <16 x i64> @vmul_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
   %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
   ret <16 x i64> %v
 }
+
+
+define <8 x i64> @vmul_vv_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vv_undef_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vmul_vv_undef_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.i v8, 0
+; RV64-NEXT:    ret
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.i v8, 0
+; RV64-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.i v8, 0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.i v8, 0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vrsub.vi v8, v8, 0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsll.vi v8, v8, 6
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT:    vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsll.vi v8, v8, 6
+; CHECK-NEXT:    vrsub.vi v8, v8, 0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.shl.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vmul_vshl_vx_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsll.vi v8, v8, 3, v0.t
+; CHECK-NEXT:    li a0, 7
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+  %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+  %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+  %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+  %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vx_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 56
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+  %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+  %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+  %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+  %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vv_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0,...
[truncated]

sunshaoce · 2024-02-01T03:08:17Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -26421,6 +26433,10 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
      return visitFMA<VPMatchContext>(N);
    case ISD::VP_SELECT:
      return visitVP_SELECT(N);
+    case ISD::VP_MUL:


I think this is a great PR for Loop Vectorize. We have too few optimizations for ISD::VP_* , similar optimizations are akin to this one. I tested optimizing VP_UDIV to VP_MUL on the RVV1.0 development board and indeed gained benefits.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

sunshaoce

LGTM. Because this patch has already undergone thorough review, and a similar one (#79101) has already been merged. I think it can be merged if there are no objections from others.

ChunyuLiao · 2024-02-02T07:41:51Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

-  if (SDValue NewSel = foldBinOpIntoSelect(N))
-    return NewSel;
+  if (!IsVP)
+    if (SDValue NewSel = foldBinOpIntoSelect(N))


I'm not sure about the correctness, but this patch has to be rebase first.

This is equivalent to no change because of !IsVP here.

topperc · 2024-02-02T07:42:42Z

LGTM. Because this patch has already undergone thorough review, and a similar one (#79101) has already been merged. I think it can be merged if there are no objections from others.

It looks like the majority of the comments were from before the switch to visitMUL.

I'm still concerned about the maintainability by non-RISCV developers who don't care about VP. I'd like to hear from someone like @RKSimon.

RKSimon · 2024-02-02T11:25:00Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

  SDValue N0 = N->getOperand(0);
  SDValue N1 = N->getOperand(1);
  EVT VT = N0.getValueType();
  SDLoc DL(N);
+  bool IsVP = N->isVPOpcode();


Didn't we previously use bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>; ?

Changed to this form.

RKSimon · 2024-02-02T11:26:37Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp


  // fold (mul x, -1) -> 0-x
  if (N1IsConst && ConstValue1.isAllOnes())
-    return DAG.getNegative(N0, DL, VT);
+    return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);


Losing helpers like this is going to get annoying very quickly :(

Do we need to add a parameter bool IsVP = false to getNegative?

I tested it and it seems that we should add the getNegative function for EmptyMatchContext and VPMatchContext.

It seems lots of amounts of helper functions that like getNegative, maybe need to find a way to reuse the same logic to avoid duplicated code.

RKSimon · 2024-02-04T13:45:11Z

@mshockwave Is working on #78654 which should provide pattern matching helpers like we have for IR transforms and GlobalISel - it'd make sense if we can decide now how we can make VP work with such an approach.

jacquesguan · 2024-05-29T06:34:10Z

ping..

RKSimon · 2024-05-29T08:24:19Z

Have you looked at making this work with SDPatternMatch?

RKSimon · 2024-05-29T09:07:32Z

CC @mshockwave who might be able to advise on how we can add VP_* support

mshockwave · 2024-05-29T16:52:36Z

CC @mshockwave who might be able to advise on how we can add VP_* support

@jacquesguan you can try to use SDPatternMatch::sd_context_match along with VPMatchContext object to replace explicit calls to VPMatchContext::match in your code. I believe it will be pretty straightforward.

Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL. Differential Revision: https://reviews.llvm.org/D121187

jacquesguan · 2024-05-30T08:51:54Z

CC @mshockwave who might be able to advise on how we can add VP_* support

@jacquesguan you can try to use SDPatternMatch::sd_context_match along with VPMatchContext object to replace explicit calls to VPMatchContext::match in your code. I believe it will be pretty straightforward.

Thanks, I changed Match to sd_context_match.

mshockwave

LGTM. Thanks

Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL. Migrate from https://reviews.llvm.org/D121187

jacquesguan requested review from frasercrmck, RKSimon, simoll, topperc and sunshaoce January 31, 2024 06:25

llvmbot added the llvm:SelectionDAG SelectionDAGISel as well label Jan 31, 2024

RKSimon mentioned this pull request Jan 31, 2024

[SelectionDAG] Introducing the SelectionDAG pattern matching framework #78654

Merged

sunshaoce reviewed Feb 1, 2024

View reviewed changes

jacquesguan force-pushed the vp_mul branch from 22921e6 to 5865c0b Compare February 2, 2024 03:39

sunshaoce approved these changes Feb 2, 2024

View reviewed changes

ChunyuLiao reviewed Feb 2, 2024

View reviewed changes

RKSimon reviewed Feb 2, 2024

View reviewed changes

jacquesguan force-pushed the vp_mul branch from 5865c0b to 27322f9 Compare February 4, 2024 02:21

sunshaoce mentioned this pull request Feb 5, 2024

[DAGCombiner][VP] add getNegative for VPMatchContext #80635

Open

[DAGCombiner][VP] Add DAGCombine for VP_MUL.

8bf0ab9

Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL. Differential Revision: https://reviews.llvm.org/D121187

jacquesguan force-pushed the vp_mul branch from 27322f9 to 8bf0ab9 Compare May 30, 2024 08:11

RKSimon requested a review from mshockwave May 30, 2024 09:37

mshockwave approved these changes May 30, 2024

View reviewed changes

jacquesguan merged commit db6de1a into llvm:main May 31, 2024
5 of 7 checks passed

HendrikHuebner pushed a commit to HendrikHuebner/llvm-project that referenced this pull request Jun 2, 2024

[DAGCombiner][VP] Add DAGCombine for VP_MUL (llvm#80105)

3e0d25c

Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL. Migrate from https://reviews.llvm.org/D121187

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DAGCombiner][VP] Add DAGCombine for VP_MUL #80105

[DAGCombiner][VP] Add DAGCombine for VP_MUL #80105

jacquesguan commented Jan 31, 2024

llvmbot commented Jan 31, 2024

sunshaoce Feb 1, 2024 •

edited

sunshaoce left a comment

ChunyuLiao Feb 2, 2024

sunshaoce Feb 2, 2024

topperc commented Feb 2, 2024

RKSimon Feb 2, 2024

jacquesguan Feb 4, 2024

RKSimon Feb 2, 2024

sunshaoce Feb 4, 2024

sunshaoce Feb 4, 2024

jacquesguan Feb 4, 2024

RKSimon commented Feb 4, 2024 •

edited

jacquesguan commented May 29, 2024

RKSimon commented May 29, 2024

RKSimon commented May 29, 2024

mshockwave commented May 29, 2024

jacquesguan commented May 30, 2024

mshockwave left a comment

[DAGCombiner][VP] Add DAGCombine for VP_MUL #80105

[DAGCombiner][VP] Add DAGCombine for VP_MUL #80105

Conversation

jacquesguan commented Jan 31, 2024

llvmbot commented Jan 31, 2024

sunshaoce Feb 1, 2024 • edited

Choose a reason for hiding this comment

sunshaoce left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

topperc commented Feb 2, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon commented Feb 4, 2024 • edited

jacquesguan commented May 29, 2024

RKSimon commented May 29, 2024

RKSimon commented May 29, 2024

mshockwave commented May 29, 2024

jacquesguan commented May 30, 2024

mshockwave left a comment

Choose a reason for hiding this comment

sunshaoce Feb 1, 2024 •

edited

RKSimon commented Feb 4, 2024 •

edited