Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Add a combine to form masked.store from unit strided store #66677

Closed

Conversation

preames
Copy link
Collaborator

@preames preames commented Sep 18, 2023

Add a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR #65674.

As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering. We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store.

Add a DAG combine to form a masked.store from a masked_strided_store intrinsic
with stride equal to element size. This is the store analogy to PR llvm#65674.

As seen in the tests, this does pickup a few cases that we'd previously missed
due to selection ordering.  We match strided stores early without going through
the recently added generic mscatter combines, and thus weren't recognizing the
unit strided store.
@llvmbot
Copy link
Collaborator

llvmbot commented Sep 18, 2023

@llvm/pr-subscribers-backend-risc-v

Changes

Add a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR #65674.

As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering. We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store.


Full diff: https://github.com/llvm/llvm-project/pull/66677.diff

4 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+18)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+2-4)
  • (modified) llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll (+1-2)
  • (modified) llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll (+1-2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index de58335b435651c..46482cfcfe25ea5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14368,6 +14368,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                  ISD::UNINDEXED, ISD::NON_EXTLOAD);
       return SDValue();
     }
+    case Intrinsic::riscv_masked_strided_store: {
+      auto *Store = cast<MemIntrinsicSDNode>(N);
+      SDValue Value = N->getOperand(2);
+      SDValue Base = N->getOperand(3);
+      SDValue Stride = N->getOperand(4);
+      SDValue Mask = N->getOperand(5);
+
+      // If the stride is equal to the element size in bytes,  we can use
+      // a masked.store.
+      const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
+      if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+          StrideC && StrideC->getZExtValue() == ElementSize)
+        return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
+                                  DAG.getUNDEF(XLenVT), Mask,
+                                  Store->getMemoryVT(), Store->getMemOperand(),
+                                  ISD::UNINDEXED, false);
+      return SDValue();
+    }
     case Intrinsic::riscv_vcpop:
     case Intrinsic::riscv_vcpop_mask:
     case Intrinsic::riscv_vfirst:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 7309fb9edec0657..60b61e889315cfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -11296,9 +11296,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
 define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
 ; CHECK-LABEL: mscatter_unit_stride:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 2
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vsse16.v v8, (a0), a1
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -11311,9 +11310,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
 ; CHECK-LABEL: mscatter_unit_stride_with_offset:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi a0, a0, 10
-; CHECK-NEXT:    li a1, 2
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vsse16.v v8, (a0), a1
+; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
index 9f8f1b224db9e85..df944fada7964b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
@@ -114,8 +114,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    li a0, 8
-; RV64-NEXT:    vsse64.v v8, (a1), a0
+; RV64-NEXT:    vs1r.v v8, (a1)
 ; RV64-NEXT:    ret
   %step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
   %gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
index 06af0fc8971a543..f8a5b0f9d03a6ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
@@ -89,9 +89,8 @@ define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) {
 ; CHECK-LABEL: strided_store_i8_nostride:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    li a2, 1
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vsse8.v v8, (a0), a2, v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m)
   ret void

Copy link
Contributor

@lukel97 lukel97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Contributor

@asb asb left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@preames
Copy link
Collaborator Author

preames commented Sep 19, 2023

Landed as 188d5c7

@preames preames closed this Sep 19, 2023
@preames preames deleted the pr-riscv-stried-store-with-stride-one branch September 19, 2023 14:53
lukel97 added a commit to lukel97/llvm-project that referenced this pull request Sep 19, 2023
This is the VP equivalent of llvm#66677. If we have a strided store where the
stride is equal to the element width, we can just use a regular VP store.
lukel97 added a commit that referenced this pull request Sep 19, 2023
…66774)

This is the VP equivalent of #66677. If we have a strided store where
the stride is equal to the element width, we can just use a regular VP
store.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants