[RISCV] Support `llvm.masked.compressstore` intrinsic #83457

nikolaypanchenko · 2024-02-29T18:19:04Z

The changeset enables lowering of llvm.masked.compressstore(%data, %ptr, %mask) for RVV for fixed vector type into:

%0 = vcompress %data, %mask, %vl
%new_vl = vcpop %mask, %vl
vse %0, %ptr, %1, %new_vl

Such lowering is only possible when %data fits into available LMULs and otherwise llvm.masked.compressstore is scalarized by ScalarizeMaskedMemIntrin pass.
Even though RVV spec in the section 15.8 provide alternative sequence for compressstore, use of vcompress + vcpop should be a proper canonical form to lower llvm.masked.compressstore. If RISC-V target find the sequence from 15.8 better, peephole optimization can transform vcompress + vcpop into that sequence.

llvmbot · 2024-02-29T18:19:40Z

@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Kolya Panchenko (nikolaypanchenko)

Changes

Patch is 592.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83457.diff

4 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+14-2)
(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+7)
(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+2)
(added) llvm/test/CodeGen/RISCV/rvv/compressstore.ll (+17545)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e647f56416bfa6..9be5138b05df07 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10460,6 +10460,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
   SDValue BasePtr = MemSD->getBasePtr();
   SDValue Val, Mask, VL;
 
+  bool IsCompressingStore = false;
   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
     Val = VPStore->getValue();
     Mask = VPStore->getMask();
@@ -10468,9 +10469,11 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
     const auto *MStore = cast<MaskedStoreSDNode>(Op);
     Val = MStore->getValue();
     Mask = MStore->getMask();
+    IsCompressingStore = MStore->isCompressingStore();
   }
 
-  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+  bool IsUnmasked =
+      ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
 
   MVT VT = Val.getSimpleValueType();
   MVT XLenVT = Subtarget.getXLenVT();
@@ -10480,7 +10483,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
     ContainerVT = getContainerForFixedLengthVector(VT);
 
     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
-    if (!IsUnmasked) {
+    if (!IsUnmasked || IsCompressingStore) {
       MVT MaskVT = getMaskTypeFor(ContainerVT);
       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
     }
@@ -10489,6 +10492,15 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
   if (!VL)
     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
 
+  if (IsCompressingStore) {
+    Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+                      DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+                      DAG.getUNDEF(ContainerVT), Val, Mask, VL);
+    VL =
+        DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
+                    getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
+  }
+
   unsigned IntID =
       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2e4e69fb4f920f..b54474b14b0543 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1609,3 +1609,10 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                   C2.NumIVMuls, C2.NumBaseAdds,
                   C2.ScaleCost, C2.ImmCost, C2.SetupCost);
 }
+
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
+  auto *VTy = dyn_cast<VectorType>(DataTy);
+  if (!VTy || VTy->isScalableTy() || !ST->hasVInstructions())
+    return false;
+  return getRegUsageForType(VTy) <= 8;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index af36e9d5d5e886..6433027cce0e27 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -261,6 +261,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
     return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
   }
 
+  bool isLegalMaskedCompressStore(Type *DataTy);
+
   bool isVScaleKnownToBeAPowerOfTwo() const {
     return TLI->isVScaleKnownToBeAPowerOfTwo();
   }
diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
new file mode 100644
index 00000000000000..f227f39740a003
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll
@@ -0,0 +1,17545 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+v,+d %s -o - | FileCheck %s --check-prefix=RV64
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+v,+d %s -o - | FileCheck %s --check-prefix=RV32
+
+; Compress + store for i8 type
+
+define void @test_compresstore_i8_v1(ptr %p, <1 x i1> %mask, <1 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v1:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT:    vcompress.vm v9, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT:    vse8.v v9, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v1:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV32-NEXT:    vcompress.vm v9, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT:    vse8.v v9, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v1i8(<1 x i8> %data, ptr %p, <1 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v2(ptr %p, <2 x i1> %mask, <2 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v2:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; RV64-NEXT:    vcompress.vm v9, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; RV64-NEXT:    vse8.v v9, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v2:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT:    vcompress.vm v9, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; RV32-NEXT:    vse8.v v9, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v2i8(<2 x i8> %data, ptr %p, <2 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v4(ptr %p, <4 x i1> %mask, <4 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v4:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; RV64-NEXT:    vcompress.vm v9, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; RV64-NEXT:    vse8.v v9, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v4:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; RV32-NEXT:    vcompress.vm v9, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; RV32-NEXT:    vse8.v v9, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v4i8(<4 x i8> %data, ptr %p, <4 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v8(ptr %p, <8 x i1> %mask, <8 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vcompress.vm v9, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; RV64-NEXT:    vse8.v v9, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vcompress.vm v9, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; RV32-NEXT:    vse8.v v9, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v8i8(<8 x i8> %data, ptr %p, <8 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v16(ptr %p, <16 x i1> %mask, <16 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT:    vcompress.vm v9, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vse8.v v9, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT:    vcompress.vm v9, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vse8.v v9, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %p, <16 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v32(ptr %p, <32 x i1> %mask, <32 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v32:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT:    vcompress.vm v10, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RV64-NEXT:    vse8.v v10, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v32:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT:    vcompress.vm v10, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RV32-NEXT:    vse8.v v10, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %p, <32 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v64(ptr %p, <64 x i1> %mask, <64 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT:    vcompress.vm v12, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT:    vse8.v v12, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT:    vcompress.vm v12, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT:    vse8.v v12, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %p, <64 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v128(ptr %p, <128 x i1> %mask, <128 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v128:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT:    vcompress.vm v16, v8, v0
+; RV64-NEXT:    vcpop.m a1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT:    vse8.v v16, (a0)
+; RV64-NEXT:    ret
+;
+; RV32-LABEL: test_compresstore_i8_v128:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    li a1, 128
+; RV32-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT:    vcompress.vm v16, v8, v0
+; RV32-NEXT:    vcpop.m a1, v0
+; RV32-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; RV32-NEXT:    vse8.v v16, (a0)
+; RV32-NEXT:    ret
+entry:
+  tail call void @llvm.masked.compressstore.v128i8(<128 x i8> %data, ptr %p, <128 x i1> %mask)
+  ret void
+}
+
+define void @test_compresstore_i8_v256(ptr %p, <256 x i1> %mask, <256 x i8> %data) {
+; RV64-LABEL: test_compresstore_i8_v256:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 128
+; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; RV64-NEXT:    vle8.v v24, (a1)
+; RV64-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a2, v0
+; RV64-NEXT:    andi a1, a2, 1
+; RV64-NEXT:    bnez a1, .LBB8_273
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a1, a2, 2
+; RV64-NEXT:    bnez a1, .LBB8_274
+; RV64-NEXT:  .LBB8_2: # %else2
+; RV64-NEXT:    andi a1, a2, 4
+; RV64-NEXT:    bnez a1, .LBB8_275
+; RV64-NEXT:  .LBB8_3: # %else5
+; RV64-NEXT:    andi a1, a2, 8
+; RV64-NEXT:    bnez a1, .LBB8_276
+; RV64-NEXT:  .LBB8_4: # %else8
+; RV64-NEXT:    andi a1, a2, 16
+; RV64-NEXT:    bnez a1, .LBB8_277
+; RV64-NEXT:  .LBB8_5: # %else11
+; RV64-NEXT:    andi a1, a2, 32
+; RV64-NEXT:    bnez a1, .LBB8_278
+; RV64-NEXT:  .LBB8_6: # %else14
+; RV64-NEXT:    andi a1, a2, 64
+; RV64-NEXT:    bnez a1, .LBB8_279
+; RV64-NEXT:  .LBB8_7: # %else17
+; RV64-NEXT:    andi a1, a2, 128
+; RV64-NEXT:    bnez a1, .LBB8_280
+; RV64-NEXT:  .LBB8_8: # %else20
+; RV64-NEXT:    andi a1, a2, 256
+; RV64-NEXT:    bnez a1, .LBB8_281
+; RV64-NEXT:  .LBB8_9: # %else23
+; RV64-NEXT:    andi a1, a2, 512
+; RV64-NEXT:    bnez a1, .LBB8_282
+; RV64-NEXT:  .LBB8_10: # %else26
+; RV64-NEXT:    andi a1, a2, 1024
+; RV64-NEXT:    bnez a1, .LBB8_283
+; RV64-NEXT:  .LBB8_11: # %else29
+; RV64-NEXT:    slli a1, a2, 52
+; RV64-NEXT:    bltz a1, .LBB8_284
+; RV64-NEXT:  .LBB8_12: # %else32
+; RV64-NEXT:    slli a1, a2, 51
+; RV64-NEXT:    bltz a1, .LBB8_285
+; RV64-NEXT:  .LBB8_13: # %else35
+; RV64-NEXT:    slli a1, a2, 50
+; RV64-NEXT:    bltz a1, .LBB8_286
+; RV64-NEXT:  .LBB8_14: # %else38
+; RV64-NEXT:    slli a1, a2, 49
+; RV64-NEXT:    bltz a1, .LBB8_287
+; RV64-NEXT:  .LBB8_15: # %else41
+; RV64-NEXT:    slli a1, a2, 48
+; RV64-NEXT:    bltz a1, .LBB8_288
+; RV64-NEXT:  .LBB8_16: # %else44
+; RV64-NEXT:    slli a1, a2, 47
+; RV64-NEXT:    bltz a1, .LBB8_289
+; RV64-NEXT:  .LBB8_17: # %else47
+; RV64-NEXT:    slli a1, a2, 46
+; RV64-NEXT:    bltz a1, .LBB8_290
+; RV64-NEXT:  .LBB8_18: # %else50
+; RV64-NEXT:    slli a1, a2, 45
+; RV64-NEXT:    bltz a1, .LBB8_291
+; RV64-NEXT:  .LBB8_19: # %else53
+; RV64-NEXT:    slli a1, a2, 44
+; RV64-NEXT:    bltz a1, .LBB8_292
+; RV64-NEXT:  .LBB8_20: # %else56
+; RV64-NEXT:    slli a1, a2, 43
+; RV64-NEXT:    bltz a1, .LBB8_293
+; RV64-NEXT:  .LBB8_21: # %else59
+; RV64-NEXT:    slli a1, a2, 42
+; RV64-NEXT:    bltz a1, .LBB8_294
+; RV64-NEXT:  .LBB8_22: # %else62
+; RV64-NEXT:    slli a1, a2, 41
+; RV64-NEXT:    bgez a1, .LBB8_23
+; RV64-NEXT:    j .LBB8_295
+; RV64-NEXT:  .LBB8_23: # %else65
+; RV64-NEXT:    slli a1, a2, 40
+; RV64-NEXT:    bgez a1, .LBB8_24
+; RV64-NEXT:    j .LBB8_296
+; RV64-NEXT:  .LBB8_24: # %else68
+; RV64-NEXT:    slli a1, a2, 39
+; RV64-NEXT:    bgez a1, .LBB8_25
+; RV64-NEXT:    j .LBB8_297
+; RV64-NEXT:  .LBB8_25: # %else71
+; RV64-NEXT:    slli a1, a2, 38
+; RV64-NEXT:    bgez a1, .LBB8_26
+; RV64-NEXT:    j .LBB8_298
+; RV64-NEXT:  .LBB8_26: # %else74
+; RV64-NEXT:    slli a1, a2, 37
+; RV64-NEXT:    bgez a1, .LBB8_27
+; RV64-NEXT:    j .LBB8_299
+; RV64-NEXT:  .LBB8_27: # %else77
+; RV64-NEXT:    slli a1, a2, 36
+; RV64-NEXT:    bgez a1, .LBB8_28
+; RV64-NEXT:    j .LBB8_300
+; RV64-NEXT:  .LBB8_28: # %else80
+; RV64-NEXT:    slli a1, a2, 35
+; RV64-NEXT:    bgez a1, .LBB8_29
+; RV64-NEXT:    j .LBB8_301
+; RV64-NEXT:  .LBB8_29: # %else83
+; RV64-NEXT:    slli a1, a2, 34
+; RV64-NEXT:    bgez a1, .LBB8_30
+; RV64-NEXT:    j .LBB8_302
+; RV64-NEXT:  .LBB8_30: # %else86
+; RV64-NEXT:    slli a1, a2, 33
+; RV64-NEXT:    bgez a1, .LBB8_31
+; RV64-NEXT:    j .LBB8_303
+; RV64-NEXT:  .LBB8_31: # %else89
+; RV64-NEXT:    slli a1, a2, 32
+; RV64-NEXT:    bgez a1, .LBB8_33
+; RV64-NEXT:  .LBB8_32: # %cond.store91
+; RV64-NEXT:    vsetivli zero, 1, e8, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v16, 31
+; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT:    vse8.v v10, (a0)
+; RV64-NEXT:    addi a0, a0, 1
+; RV64-NEXT:  .LBB8_33: # %else92
+; RV64-NEXT:    addi sp, sp, -2032
+; RV64-NEXT:    .cfi_def_cfa_offset 2032
+; RV64-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    addi s0, sp, 2032
+; RV64-NEXT:    .cfi_def_cfa s0, 0
+; RV64-NEXT:    lui a1, 6
+; RV64-NEXT:    addiw a1, a1, -1776
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    andi sp, sp, -128
+; RV64-NEXT:    slli a3, a2, 31
+; RV64-NEXT:    lui a1, 6
+; RV64-NEXT:    addiw a1, a1, -984
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    bgez a3, .LBB8_34
+; RV64-NEXT:    j .LBB8_304
+; RV64-NEXT:  .LBB8_34: # %else95
+; RV64-NEXT:    slli a3, a2, 30
+; RV64-NEXT:    bgez a3, .LBB8_35
+; RV64-NEXT:    j .LBB8_305
+; RV64-NEXT:  .LBB8_35: # %else98
+; RV64-NEXT:    slli a3, a2, 29
+; RV64-NEXT:    bgez a3, .LBB8_36
+; RV64-NEXT:    j .LBB8_306
+; RV64-NEXT:  .LBB8_36: # %else101
+; RV64-NEXT:    slli a3, a2, 28
+; RV64-NEXT:    bgez a3, .LBB8_37
+; RV64-NEXT:    j .LBB8_307
+; RV64-NEXT:  .LBB8_37: # %else104
+; RV64-NEXT:    slli a3, a2, 27
+; RV64-NEXT:    bgez a3, .LBB8_38
+; RV64-NEXT:    j .LBB8_308
+; RV64-NEXT:  .LBB8_38: # %else107
+; RV64-NEXT:    slli a3, a2, 26
+; RV64-NEXT:    bgez a3, .LBB8_39
+; RV64-NEXT:    j .LBB8_309
+; RV64-NEXT:  .LBB8_39: # %else110
+; RV64-NEXT:    slli a3, a2, 25
+; RV64-NEXT:    bgez a3, .LBB8_40
+; RV64-NEXT:    j .LBB8_310
+; RV64-NEXT:  .LBB8_40: # %else113
+; RV64-NEXT:    slli a3, a2, 24
+; RV64-NEXT:    bgez a3, .LBB8_41
+; RV64-NEXT:    j .LBB8_311
+; RV64-NEXT:  .LBB8_41: # %else116
+; RV64-NEXT:    slli a3, a2, 23
+; RV64-NEXT:    bgez a3, .LBB8_43
+; RV64-NEXT:  .LBB8_42: # %cond.store118
+; RV64-NEXT:    li a3, 128
+; RV64-NEXT:    li a4, 23
+; RV64-NEXT:    slli a4, a4, 10
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT:    vse8.v v16, (a4)
+; RV64-NEXT:    lbu a1, 0(a1)
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    addi a0, a0, 1
+; RV64-NEXT:  .LBB8_43: # %else119
+; RV64-NEXT:    slli a3, a2, 22
+; RV64-NEXT:    lui a1, 5
+; RV64-NEXT:    addiw a1, a1, 953
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    bgez a3, .LBB8_44
+; RV64-NEXT:    j .LBB8_312
+; RV64-NEXT:  .LBB8_44: # %else122
+; RV64-NEXT:    slli a3, a2, 21
+; RV64-NEXT:    bgez a3, .LBB8_45
+; RV64-NEXT:    j .LBB8_313
+; RV64-NEXT:  .LBB8_45: # %else125
+; RV64-NEXT:    slli a3, a2, 20
+; RV64-NEXT:    bgez a3, .LBB8_46
+; RV64-NEXT:    j .LBB8_314
+; RV64-NEXT:  .LBB8_46: # %else128
+; RV64-NEXT:    slli a3, a2, 19
+; RV64-NEXT:    bgez a3, .LBB8_47
+; RV64-NEXT:    j .LBB8_315
+; RV64-NEXT:  .LBB8_47: # %else131
+; RV64-NEXT:    slli a3, a2, 18
+; RV64-NEXT:    bgez a3, .LBB8_48
+; RV64-NEXT:    j .LBB8_316
+; RV64-NEXT:  .LBB8_48: # %else134
+; RV64-NEXT:    slli a3, a2, 17
+; RV64-NEXT:    bgez a3, .LBB8_49
+; RV64-NEXT:    j .LBB8_317
+; RV64-NEXT:  .LBB8_49: # %else137
+; RV64-NEXT:    slli a3, a2, 16
+; RV64-NEXT:    bgez a3, .LBB8_50
+; RV64-NEXT:    j .LBB8_318
+; RV64-NEXT:  .LBB8_50: # %else140
+; RV64-NEXT:    slli a3, a2, 15
+; RV64-NEXT:    bgez a3, .LBB8_51
+; RV64-NEXT:    j .LBB8_319
+; RV64-NEXT:  .LBB8_51: # %else143
+; RV64-NEXT:    slli a3, a2, 14
+; RV64-NEXT:    bgez a3, .LBB8_52
+; RV64-NEXT:    j .LBB8_320
+; RV64-NEXT:  .LBB8_52: # %else146
+; RV64-NEXT:    slli a3, a2, 13
+; RV64-NEXT:    bgez a3, .LBB8_53
+; RV64-NEXT:    j .LBB8_321
+; RV64-NEXT:  .LBB8_53: # %else149
+; RV64-NEXT:    slli a3, a2, 12
+; RV64-NEXT:    bgez a3, .LBB8_54
+; RV64-NEXT:    j .LBB8_322
+; RV64-NEXT:  .LBB8_54: # %else152
+; RV64-NEXT:    slli a3, a2, 11
+; RV64-NEXT:    bgez a3, .LBB8_55
+; RV64-NEXT:    j .LBB8_323
+; RV64-NEXT:  .LBB8_55: # %else155
+; RV64-NEXT:    slli a3, a2, 10
+; RV64-NEXT:    bgez a3, .LBB8_56
+; RV64-NEXT:    j .LBB8_324
+; RV64-NEXT:  .LBB8_56: # %else158
+; RV64-NEXT:    slli a3, a2, 9
+; RV64-NEXT:    bgez a3, .LBB8_57
+; RV64-NEXT:    j .LBB8_325
+; RV64-NEXT:  .LBB8_57: # %else161
+; RV64-NEXT:    slli a3, a2, 8
+; RV64-NEXT:    bgez a3, .LBB8_58
+; RV64-NEXT:    j .LBB8_326
+; RV64-NEXT:  .LBB8_58: # %else164
+; RV64-NEXT:    slli a3, a2, 7
+; RV64-NEXT:    bgez a3, .LBB8_59
+; RV64-NEXT:    j .LBB8_327
+; RV64-NEXT:  .LBB8_59: # %else167
+; RV64-NEXT:    slli a3, a2, 6
+; RV64-NEXT:    bgez a3, .LBB8_61
+; RV64-NEXT:  .LBB8_60: # %cond.store169
+; RV64-NEXT:    li a3, 128
+; RV64-NEXT:    lui a4, 5
+; RV64-NEXT:    addiw a4, a4, 896
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; RV64-NEXT:    vse8.v v16, (a4)
+; RV64-NEXT:    lbu a1, 0(a1)
+; RV64-NEXT:    addi a3, a0, 1
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    mv a0, a3
+; RV64-NEXT:  .LBB8_61: # %else170
+; RV64-NEXT:    slli a1, a2, 5
+; RV64-NEXT:    lui a3, 5
+; RV64-NEXT:    addiw a3, a3, -1206
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    bgez a1, .LBB8_62
+; RV64-NEXT:    j .LBB8_328
+; RV64-NEXT:  .LBB8_62: # %else173
+; RV64-NEXT:    slli a1, a2, 4
+; RV64-NEXT:    bgez a1, .LBB8_63
+; RV64-NEXT:    j .LBB8_329
+; RV64-NEXT:  .LBB8_63: # %else176
+; RV64-NEXT:    slli a1, a2, 3
+; RV64-NEXT:    bgez a1, .LBB8_64
+; RV64-NEXT:    j .LBB8_330
+; RV64-NEXT:  .LBB8_64: # %else179
+; RV64-NEXT:    slli a1, a2, 2
+; RV64-NEXT:    bgez a1, .LBB8_66
+; RV64-NEXT:  .LBB8_65: # %cond.store181
+; RV64-NEXT:    li a1, 128
+; RV64-NEXT:    lui a4, 5
+; RV64-NEXT:    addiw a4, a4, 384
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; RV64-NEXT:    vse8.v v16, (a4)
+; RV64-NEXT:    lbu a...
[truncated]

wangpc-pp · 2024-02-29T18:35:30Z

llvm/test/CodeGen/RISCV/rvv/compressstore.ll

+; RV32-NEXT:    sb a4, 0(a0)
+; RV32-NEXT:    mv a0, a5
+; RV32-NEXT:    slli a4, a3, 15
+; RV32-NEXT:    bltz a4, .LBB


Is this codegen right? It's so huge.

Looks like there's no alignment information so the scalar stores got split

In this case the compressstore was scalarized by ScalarizeMaskedMemIntrin pass due to the fact the data occupies 16 registers, i.e. requires lmul=16, which won't be easy to legalize.
I'll provide explanation in the description

topperc · 2024-02-29T18:44:04Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

@@ -1609,3 +1609,10 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                  C2.NumIVMuls, C2.NumBaseAdds,
                  C2.ScaleCost, C2.ImmCost, C2.SetupCost);
 }
+
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy) {


This interface needs an alignment parameter doesn't it?

For RVV it's not required. The store of the compressed data is unaligned, i.e. it should be possible to use vse8 for non-naturally aligned memory

We would need to scale the VL from the vcpop to use vse8.

Doesn't look like alignment was accounted for in the intrinsic definition. masked.gather/scatter had a separate argument. vp.gather/scatter use a pointer attribute. I think we should use the pointer attribute for compress store.

It looks like SelectionDAGBuilder::visitMaskedStore currently sets the alignment to the ABI alignment of the vector type which doesn't make sense.

Once we fix the alignment for the intrinsic we'll also need to teach ScalarMaskedMemIntrinsic to preserve it when scalarizing.

Once we fix the alignment for the intrinsic we'll also need to teach ScalarMaskedMemIntrinsic to preserve it when scalarizing.

To emphasize, this should be done in a separate review and should probably be addressed before we continue too far with this one.

I had created a PR #83519 to preserving its alignment when scalarizing.

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

llvm/test/CodeGen/RISCV/rvv/compressstore.ll

preames · 2024-02-29T21:41:53Z

Can you separate your NFC to add the Align parameter to TTI into it's own review? It's clearly needed, we should be able to land that one quickly and rebase this on top.

nikolaypanchenko · 2024-03-01T02:14:13Z

Can you separate your NFC to add the Align parameter to TTI into it's own review? It's clearly needed, we should be able to land that one quickly and rebase this on top.

Absolutely. #83516

yetingk · 2024-03-07T03:02:49Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

@@ -1620,3 +1620,15 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                  C2.NumIVMuls, C2.NumBaseAdds,
                  C2.ScaleCost, C2.ImmCost, C2.SetupCost);
 }
+
+bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {


I think we need to check whether the fixed vector type is legal like isLegalMaskedLoadStore().

I think we could also just use isLegalMaskedLoadStore() for compress/expandload like masked.load/store?

Unlike x86, if store is done via separate instruction and requires masking, RVV's implementation does not require masking/predication due to tweaked VL.

The check being asked for is that fixed vectors are supported. Not that the specific type is legal. We still have a master disable for fixed vectors in the backend by setting -rvv-vector-bits-min=0.

// Only support fixed vectors if we know the minimum vector size. if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors()) return false;

llvm/test/CodeGen/RISCV/rvv/compressstore.ll

lukel97 · 2024-03-07T03:09:14Z

llvm/test/CodeGen/RISCV/rvv/compressstore.ll

Is it possible to test for scalable vectors too?

TTI part of the changeset disallows scalable vectors. The legalization part in ScalarizeMaskedMemIntrin cannot handle them either.
It might be better to relax TTI constraint and add tests for scalable vectors in followup patch, what do you think ?

topperc · 2024-03-12T18:40:00Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+          TLI->getValueType(DL, VTy->getElementType())))
+    return false;
+
+  return getRegUsageForType(VTy) <= 8;


Why is this needed? The backend should be able to split a compressed store.

It's not for the store, but for the compress. Splitting it for LMUL >= 16 is doable, but is not simple

The splitting will happen while it is still a compressedstore. The compress instruction isn't created until after the split is already done.

It should create a second compressed store whose pointer is the basepointer+popcnt(lower_half_mask)

topperc · 2024-03-12T18:40:25Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+  if (!TLI->isLegalElementTypeForRVV(
+          TLI->getValueType(DL, VTy->getElementType())))
+    return false;
+


Need to check alignment unless you add misaligned support to RISCVISelLowering.cpp

Ok. I'll add isLegalMaskedLoadStore as @yetingk suggested.

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

topperc

LGTM

llvmbot added the backend:RISC-V label Feb 29, 2024

npanchen requested a review from topperc February 29, 2024 18:19

topperc requested review from preames, lukel97 and yetingk February 29, 2024 18:20

wangpc-pp reviewed Feb 29, 2024

View reviewed changes

topperc reviewed Feb 29, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

topperc reviewed Feb 29, 2024

View reviewed changes

llvm/test/CodeGen/RISCV/rvv/compressstore.ll Outdated Show resolved Hide resolved

llvmbot added backend:X86 llvm:analysis llvm:transforms labels Feb 29, 2024

nikolaypanchenko mentioned this pull request Mar 1, 2024

[TTI] Add alignment argument to TTI for compress/expand support #83516

Merged

nikolaypanchenko force-pushed the pr/npanchen/riscv_compressstore branch from 81f41d8 to a307f04 Compare March 6, 2024 18:26

yetingk reviewed Mar 7, 2024

View reviewed changes

lukel97 reviewed Mar 7, 2024

View reviewed changes

nikolaypanchenko added 4 commits March 12, 2024 10:39

[RISCV] Support llvm.masked.compressstore intrinsic

7034dc8

Added alignment argument to TTI for compress/expand

9354dad

Properly check if scalar type is legal for RVV

4b521bb

rebase

276abb7

topperc reviewed Mar 12, 2024

View reviewed changes

Addressed comments

f5fb36d

nikolaypanchenko force-pushed the pr/npanchen/riscv_compressstore branch from a307f04 to f5fb36d Compare March 12, 2024 19:32

topperc mentioned this pull request Mar 12, 2024

[RISCV][WIP] Add validation of SPIMM for cm.push/pop. #84989

Merged

topperc reviewed Mar 12, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

Applied suggestions

b90049a

topperc approved these changes Mar 13, 2024

View reviewed changes

npanchen merged commit aa68e28 into llvm:main Mar 13, 2024
4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Support `llvm.masked.compressstore` intrinsic #83457

[RISCV] Support `llvm.masked.compressstore` intrinsic #83457

nikolaypanchenko commented Feb 29, 2024 •

edited

llvmbot commented Feb 29, 2024 •

edited

wangpc-pp Feb 29, 2024

topperc Feb 29, 2024

nikolaypanchenko Feb 29, 2024

topperc Feb 29, 2024

nikolaypanchenko Feb 29, 2024

topperc Feb 29, 2024

topperc Feb 29, 2024

preames Feb 29, 2024

yetingk Mar 1, 2024

preames commented Feb 29, 2024

nikolaypanchenko commented Mar 1, 2024

yetingk Mar 7, 2024

yetingk Mar 7, 2024

nikolaypanchenko Mar 12, 2024

topperc Mar 12, 2024

lukel97 Mar 7, 2024

nikolaypanchenko Mar 12, 2024

topperc Mar 12, 2024

nikolaypanchenko Mar 12, 2024

topperc Mar 12, 2024 •

edited

topperc Mar 12, 2024

nikolaypanchenko Mar 12, 2024

topperc left a comment

[RISCV] Support llvm.masked.compressstore intrinsic #83457

[RISCV] Support llvm.masked.compressstore intrinsic #83457

Conversation

nikolaypanchenko commented Feb 29, 2024 • edited

llvmbot commented Feb 29, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

preames commented Feb 29, 2024

nikolaypanchenko commented Mar 1, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

topperc Mar 12, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

topperc left a comment

Choose a reason for hiding this comment

[RISCV] Support `llvm.masked.compressstore` intrinsic #83457

[RISCV] Support `llvm.masked.compressstore` intrinsic #83457

nikolaypanchenko commented Feb 29, 2024 •

edited

llvmbot commented Feb 29, 2024 •

edited

topperc Mar 12, 2024 •

edited