[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

sun-jacobi · 2024-02-03T01:36:45Z

Extend #78403 and #80079 to support vwsub.wv and vwsubu.wv.

Code

define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    %sa = sext <8 x i32> %a to <8 x i64>
    %ret = sub <8 x i64> %y, %sa
    ret <8 x i64> %ret
}

Before this patch

Compiler Explorer

vwsub_wv_mask_v8i32:
        li      a0, 42
        vsetivli        zero, 8, e32, m2, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v10, 0
        vmerge.vvm      v16, v10, v8, v0
        vwsub.wv        v8, v12, v16
        ret

After this patch

vwsub_wv_mask_v8i32:
        li a0, 42
        vsetivli zero, 8, e32, m2, ta, ma
        vmslt.vx v0, v8, a0
        vsetvli zero, zero, e32, m2, tu, mu
        vwsub.wv v12, v12, v8, v0.t
        vmv4r.v v8, v12
        ret

llvmbot · 2024-02-03T01:37:16Z

@llvm/pr-subscribers-backend-risc-v

Author: Chia (sun-jacobi)

Changes

Extend #78403 and #80079 to support vwsub.wv and vwsubu.wv.

Code

define &lt;8 x i64&gt; @<!-- -->vwsub_wv_mask_v8i32(&lt;8 x i32&gt; %x, &lt;8 x i64&gt; %y) {
    %mask = icmp slt &lt;8 x i32&gt; %x, &lt;i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42&gt;
    %a = select &lt;8 x i1&gt; %mask, &lt;8 x i32&gt; %x, &lt;8 x i32&gt; zeroinitializer
    %sa = sext &lt;8 x i32&gt; %a to &lt;8 x i64&gt;
    %ret = sub &lt;8 x i64&gt; %y, %sa
    ret &lt;8 x i64&gt; %ret
}

Before this patch

Compiler Explorer

vwsub_wv_mask_v8i32:
        li      a0, 42
        vsetivli        zero, 8, e32, m2, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v10, 0
        vmerge.vvm      v16, v10, v8, v0
        vwsub.wv        v8, v12, v16
        ret

After this patch

vwsub_wv_mask_v8i32:
        li a0, 42
        vsetivli zero, 8, e32, m2, ta, ma
        vmslt.vx v0, v8, a0
        vsetvli zero, zero, e32, m2, tu, mu
        vwsub.wv v12, v12, v8, v0.t
        vmv4r.v v8, v12
        ret

Full diff: https://github.com/llvm/llvm-project/pull/80523.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+13-10)
(added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll (+73)
(added) llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll (+73)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b5db41197a35a..32ef41e270f63 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13761,11 +13761,13 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
   return InputRootReplacement;
 }
 
-// Fold (vwadd.wv y, (vmerge cond, x, 0)) -> vwadd.wv y, x, y, cond
+// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
+//      (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
 // y will be the Passthru and cond will be the Mask.
-static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
   unsigned Opc = N->getOpcode();
-  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
+  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
+         Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
 
   SDValue Y = N->getOperand(0);
   SDValue MergeOp = N->getOperand(1);
@@ -13804,16 +13806,17 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
                      N->getFlags());
 }
 
-static SDValue performVWADDW_VLCombine(SDNode *N,
-                                       TargetLowering::DAGCombinerInfo &DCI,
-                                       const RISCVSubtarget &Subtarget) {
+static SDValue performVWADDSUBW_VLCombine(SDNode *N,
+                                          TargetLowering::DAGCombinerInfo &DCI,
+                                          const RISCVSubtarget &Subtarget) {
   [[maybe_unused]] unsigned Opc = N->getOpcode();
-  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
+  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
+         Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
 
   if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
     return V;
 
-  return combineVWADDWSelect(N, DCI.DAG);
+  return combineVWADDSUBWSelect(N, DCI.DAG);
 }
 
 // Helper function for performMemPairCombine.
@@ -15886,10 +15889,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     return combineToVWMACC(N, DAG, Subtarget);
   case RISCVISD::VWADD_W_VL:
   case RISCVISD::VWADDU_W_VL:
-    return performVWADDW_VLCombine(N, DCI, Subtarget);
-  case RISCVISD::SUB_VL:
   case RISCVISD::VWSUB_W_VL:
   case RISCVISD::VWSUBU_W_VL:
+    return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
+  case RISCVISD::SUB_VL:
   case RISCVISD::MUL_VL:
     return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
   case RISCVISD::VFMADD_VL:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
new file mode 100644
index 0000000000000..382f00913cb41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT:    vwsub.wv v12, v12, v8, v0.t
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = sext <8 x i32> %a to <8 x i64>
+    %ret = sub <8 x i64> %y, %sa
+    ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsubu_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT:    vwsubu.wv v12, v12, v8, v0.t
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = zext <8 x i32> %a to <8 x i64>
+    %ret = sub <8 x i64> %y, %sa
+    ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vwsubu_vv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    vwsubu.vv v12, v10, v8
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = zext <8 x i32> %a to <8 x i64>
+    %sy = zext <8 x i32> %y to <8 x i64>
+    %ret = sub <8 x i64> %sy, %sa
+    ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v10, 1
+; CHECK-NEXT:    vmerge.vvm v16, v10, v8, v0
+; CHECK-NEXT:    vwsub.wv v8, v12, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+    %sa = sext <8 x i32> %a to <8 x i64>
+    %ret = sub <8 x i64> %y, %sa
+    ret <8 x i64> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
new file mode 100644
index 0000000000000..0cc0063c1d41c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <vscale x 8 x i64> @vwsub_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT:    vwsub.wv v16, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %y, %sa
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsubu_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT:    vwsubu.wv v16, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %y, %sa
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsubu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vwsubu_vv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vwsubu.vv v16, v12, v8
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %sy = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %sy, %sa
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsub_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v12, 1
+; CHECK-NEXT:    vmerge.vvm v24, v12, v8, v0
+; CHECK-NEXT:    vwsub.wv v8, v16, v24
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %y, %sa
+    ret <vscale x 8 x i64> %ret
+}

lukel97

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

sun-jacobi · 2024-02-05T03:32:09Z

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

Sound great! Thank you.

topperc · 2024-02-05T03:42:45Z

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

Isn't this patch for .wv instructions? There is no vwmul.wv.

sun-jacobi · 2024-02-05T03:45:07Z

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

Isn't this patch for .wv instructions? There is no vwmul.wv.

oops, you are right. Thanks

llvmbot added the backend:RISC-V label Feb 3, 2024

sun-jacobi requested review from lukel97 and topperc February 3, 2024 01:37

lukel97 approved these changes Feb 5, 2024

View reviewed changes

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv.

a53a3de

sun-jacobi force-pushed the merge-vwsub-mask branch from 10ccfc8 to a53a3de Compare February 5, 2024 04:12

sun-jacobi merged commit db060ab into llvm:main Feb 5, 2024
3 of 4 checks passed

sun-jacobi deleted the merge-vwsub-mask branch February 5, 2024 08:44

agozillon pushed a commit to agozillon/llvm-project that referenced this pull request Feb 5, 2024

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. (llvm#80523)

503ce95

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

sun-jacobi commented Feb 3, 2024

llvmbot commented Feb 3, 2024

Code

Before this patch

After this patch

lukel97 left a comment

sun-jacobi commented Feb 5, 2024

topperc commented Feb 5, 2024

sun-jacobi commented Feb 5, 2024

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

Conversation

sun-jacobi commented Feb 3, 2024

Code

Before this patch

After this patch

llvmbot commented Feb 3, 2024

Code

Before this patch

After this patch

lukel97 left a comment

Choose a reason for hiding this comment

sun-jacobi commented Feb 5, 2024

topperc commented Feb 5, 2024

sun-jacobi commented Feb 5, 2024