Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523

Merged
merged 1 commit into from
Feb 5, 2024

Conversation

sun-jacobi
Copy link
Member

Extend #78403 and #80079 to support vwsub.wv and vwsubu.wv.

Code

define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    %sa = sext <8 x i32> %a to <8 x i64>
    %ret = sub <8 x i64> %y, %sa
    ret <8 x i64> %ret
}

Before this patch

Compiler Explorer

vwsub_wv_mask_v8i32:
        li      a0, 42
        vsetivli        zero, 8, e32, m2, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v10, 0
        vmerge.vvm      v16, v10, v8, v0
        vwsub.wv        v8, v12, v16
        ret

After this patch

vwsub_wv_mask_v8i32:
        li a0, 42
        vsetivli zero, 8, e32, m2, ta, ma
        vmslt.vx v0, v8, a0
        vsetvli zero, zero, e32, m2, tu, mu
        vwsub.wv v12, v12, v8, v0.t
        vmv4r.v v8, v12
        ret

@llvmbot
Copy link
Collaborator

llvmbot commented Feb 3, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Chia (sun-jacobi)

Changes

Extend #78403 and #80079 to support vwsub.wv and vwsubu.wv.

Code

define &lt;8 x i64&gt; @<!-- -->vwsub_wv_mask_v8i32(&lt;8 x i32&gt; %x, &lt;8 x i64&gt; %y) {
    %mask = icmp slt &lt;8 x i32&gt; %x, &lt;i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42&gt;
    %a = select &lt;8 x i1&gt; %mask, &lt;8 x i32&gt; %x, &lt;8 x i32&gt; zeroinitializer
    %sa = sext &lt;8 x i32&gt; %a to &lt;8 x i64&gt;
    %ret = sub &lt;8 x i64&gt; %y, %sa
    ret &lt;8 x i64&gt; %ret
}

Before this patch

Compiler Explorer

vwsub_wv_mask_v8i32:
        li      a0, 42
        vsetivli        zero, 8, e32, m2, ta, ma
        vmslt.vx        v0, v8, a0
        vmv.v.i v10, 0
        vmerge.vvm      v16, v10, v8, v0
        vwsub.wv        v8, v12, v16
        ret

After this patch

vwsub_wv_mask_v8i32:
        li a0, 42
        vsetivli zero, 8, e32, m2, ta, ma
        vmslt.vx v0, v8, a0
        vsetvli zero, zero, e32, m2, tu, mu
        vwsub.wv v12, v12, v8, v0.t
        vmv4r.v v8, v12
        ret

Full diff: https://github.com/llvm/llvm-project/pull/80523.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+13-10)
  • (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll (+73)
  • (added) llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll (+73)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b5db41197a35a..32ef41e270f63 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13761,11 +13761,13 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
   return InputRootReplacement;
 }
 
-// Fold (vwadd.wv y, (vmerge cond, x, 0)) -> vwadd.wv y, x, y, cond
+// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
+//      (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
 // y will be the Passthru and cond will be the Mask.
-static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
   unsigned Opc = N->getOpcode();
-  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
+  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
+         Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
 
   SDValue Y = N->getOperand(0);
   SDValue MergeOp = N->getOperand(1);
@@ -13804,16 +13806,17 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
                      N->getFlags());
 }
 
-static SDValue performVWADDW_VLCombine(SDNode *N,
-                                       TargetLowering::DAGCombinerInfo &DCI,
-                                       const RISCVSubtarget &Subtarget) {
+static SDValue performVWADDSUBW_VLCombine(SDNode *N,
+                                          TargetLowering::DAGCombinerInfo &DCI,
+                                          const RISCVSubtarget &Subtarget) {
   [[maybe_unused]] unsigned Opc = N->getOpcode();
-  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
+  assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
+         Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
 
   if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
     return V;
 
-  return combineVWADDWSelect(N, DCI.DAG);
+  return combineVWADDSUBWSelect(N, DCI.DAG);
 }
 
 // Helper function for performMemPairCombine.
@@ -15886,10 +15889,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     return combineToVWMACC(N, DAG, Subtarget);
   case RISCVISD::VWADD_W_VL:
   case RISCVISD::VWADDU_W_VL:
-    return performVWADDW_VLCombine(N, DCI, Subtarget);
-  case RISCVISD::SUB_VL:
   case RISCVISD::VWSUB_W_VL:
   case RISCVISD::VWSUBU_W_VL:
+    return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
+  case RISCVISD::SUB_VL:
   case RISCVISD::MUL_VL:
     return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
   case RISCVISD::VFMADD_VL:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
new file mode 100644
index 0000000000000..382f00913cb41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT:    vwsub.wv v12, v12, v8, v0.t
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = sext <8 x i32> %a to <8 x i64>
+    %ret = sub <8 x i64> %y, %sa
+    ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsubu_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT:    vwsubu.wv v12, v12, v8, v0.t
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = zext <8 x i32> %a to <8 x i64>
+    %ret = sub <8 x i64> %y, %sa
+    ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vwsubu_vv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT:    vwsubu.vv v12, v10, v8
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+    %sa = zext <8 x i32> %a to <8 x i64>
+    %sy = zext <8 x i32> %y to <8 x i64>
+    %ret = sub <8 x i64> %sy, %sa
+    ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v10, 1
+; CHECK-NEXT:    vmerge.vvm v16, v10, v8, v0
+; CHECK-NEXT:    vwsub.wv v8, v12, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+    %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+    %sa = sext <8 x i32> %a to <8 x i64>
+    %ret = sub <8 x i64> %y, %sa
+    ret <8 x i64> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
new file mode 100644
index 0000000000000..0cc0063c1d41c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <vscale x 8 x i64> @vwsub_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT:    vwsub.wv v16, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %y, %sa
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsubu_wv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT:    vwsubu.wv v16, v16, v8, v0.t
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %y, %sa
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsubu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vwsubu_vv_mask_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT:    vwsubu.vv v16, v12, v8
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+    %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %sy = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %sy, %sa
+    ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsub_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 42
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-NEXT:    vmv.v.i v12, 1
+; CHECK-NEXT:    vmerge.vvm v24, v12, v8, v0
+; CHECK-NEXT:    vwsub.wv v8, v16, v24
+; CHECK-NEXT:    ret
+    %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+    %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+    %ret = sub <vscale x 8 x i64> %y, %sa
+    ret <vscale x 8 x i64> %ret
+}

Copy link
Contributor

@lukel97 lukel97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

@sun-jacobi
Copy link
Member Author

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

Sound great! Thank you.

@topperc
Copy link
Collaborator

topperc commented Feb 5, 2024

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

Isn't this patch for .wv instructions? There is no vwmul.wv.

@sun-jacobi
Copy link
Member Author

LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant so we can handle vwmul too?

Isn't this patch for .wv instructions? There is no vwmul.wv.

oops, you are right. Thanks

@sun-jacobi sun-jacobi merged commit db060ab into llvm:main Feb 5, 2024
3 of 4 checks passed
@sun-jacobi sun-jacobi deleted the merge-vwsub-mask branch February 5, 2024 08:44
agozillon pushed a commit to agozillon/llvm-project that referenced this pull request Feb 5, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants