Skip to content

Commit

Permalink
[DAG] Support store merging of vector constant stores
Browse files Browse the repository at this point in the history
Ran across this when making a change to RISCV memset lowering. Seems very odd that manually merging a store into a vector prevents it from being further merged.

Differential Revision: https://reviews.llvm.org/D156349
  • Loading branch information
preames committed Aug 2, 2023
1 parent 2ad297d commit 660b740
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 174 deletions.
13 changes: 12 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,11 @@ namespace {
case ISD::Constant:
case ISD::ConstantFP:
return StoreSource::Constant;
case ISD::BUILD_VECTOR:
if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
return StoreSource::Constant;
return StoreSource::Unknown;
case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR:
return StoreSource::Extract;
Expand Down Expand Up @@ -19471,6 +19476,10 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// If fp truncation is necessary give up for now.
if (MemVT.getSizeInBits() != ElementSizeBits)
return false;
} else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
// Not yet handled
return false;
} else {
llvm_unreachable("Invalid constant element type");
}
Expand Down Expand Up @@ -19601,7 +19610,7 @@ void DAGCombiner::getStoreMergeCandidates(
case StoreSource::Constant:
if (NoTypeMatch)
return false;
if (!isIntOrFPConstant(OtherBC))
if (getStoreSource(OtherBC) != StoreSource::Constant)
return false;
break;
case StoreSource::Extract:
Expand Down Expand Up @@ -19823,6 +19832,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
IsElementZero = true;
if (IsElementZero) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;
Expand Down
178 changes: 20 additions & 158 deletions llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -544,53 +544,31 @@ define void @bzero_32(ptr %a) nounwind {
define void @bzero_64(ptr %a) nounwind {
; RV32-LABEL: bzero_64:
; RV32: # %bb.0:
; RV32-NEXT: addi a1, a0, 48
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: li a1, 64
; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: addi a1, a0, 32
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: addi a1, a0, 16
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_64:
; RV64: # %bb.0:
; RV64-NEXT: addi a1, a0, 48
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64-NEXT: li a1, 64
; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: addi a1, a0, 32
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: addi a1, a0, 16
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_64:
; RV32-FAST: # %bb.0:
; RV32-FAST-NEXT: addi a1, a0, 48
; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-FAST-NEXT: vmv.v.i v8, 0
; RV32-FAST-NEXT: vse64.v v8, (a1)
; RV32-FAST-NEXT: addi a1, a0, 32
; RV32-FAST-NEXT: vse64.v v8, (a1)
; RV32-FAST-NEXT: addi a1, a0, 16
; RV32-FAST-NEXT: vse64.v v8, (a1)
; RV32-FAST-NEXT: vse64.v v8, (a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: bzero_64:
; RV64-FAST: # %bb.0:
; RV64-FAST-NEXT: addi a1, a0, 48
; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-FAST-NEXT: vmv.v.i v8, 0
; RV64-FAST-NEXT: vse64.v v8, (a1)
; RV64-FAST-NEXT: addi a1, a0, 32
; RV64-FAST-NEXT: vse64.v v8, (a1)
; RV64-FAST-NEXT: addi a1, a0, 16
; RV64-FAST-NEXT: vse64.v v8, (a1)
; RV64-FAST-NEXT: vse64.v v8, (a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0)
Expand Down Expand Up @@ -686,27 +664,15 @@ define void @aligned_bzero_32(ptr %a) nounwind {
define void @aligned_bzero_64(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_64:
; RV32-BOTH: # %bb.0:
; RV32-BOTH-NEXT: addi a1, a0, 48
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 32
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 16
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: vse64.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_64:
; RV64-BOTH: # %bb.0:
; RV64-BOTH-NEXT: addi a1, a0, 48
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 32
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 16
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0)
Expand All @@ -717,28 +683,16 @@ define void @aligned_bzero_66(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_66:
; RV32-BOTH: # %bb.0:
; RV32-BOTH-NEXT: sh zero, 64(a0)
; RV32-BOTH-NEXT: addi a1, a0, 48
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 32
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 16
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: vse64.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_66:
; RV64-BOTH: # %bb.0:
; RV64-BOTH-NEXT: sh zero, 64(a0)
; RV64-BOTH-NEXT: addi a1, a0, 48
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 32
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 16
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 66, i1 0)
Expand All @@ -754,12 +708,8 @@ define void @aligned_bzero_96(ptr %a) nounwind {
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 64
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 48
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 32
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 16
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
; RV32-BOTH-NEXT: vse64.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
Expand All @@ -771,12 +721,8 @@ define void @aligned_bzero_96(ptr %a) nounwind {
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 64
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 48
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 32
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 16
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0)
Expand All @@ -786,43 +732,15 @@ define void @aligned_bzero_96(ptr %a) nounwind {
define void @aligned_bzero_128(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_128:
; RV32-BOTH: # %bb.0:
; RV32-BOTH-NEXT: addi a1, a0, 112
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 96
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 80
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 64
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 48
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 32
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 16
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: vse64.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_128:
; RV64-BOTH: # %bb.0:
; RV64-BOTH-NEXT: addi a1, a0, 112
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 96
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 80
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 64
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 48
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 32
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 16
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 128, i1 0)
Expand All @@ -832,74 +750,18 @@ define void @aligned_bzero_128(ptr %a) nounwind {
define void @aligned_bzero_256(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_256:
; RV32-BOTH: # %bb.0:
; RV32-BOTH-NEXT: addi a1, a0, 240
; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 224
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 208
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 192
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 176
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 160
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 144
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 128
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 112
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 96
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 80
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 64
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 48
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 32
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: addi a1, a0, 16
; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-BOTH-NEXT: vmv.v.i v8, 0
; RV32-BOTH-NEXT: vse64.v v8, (a1)
; RV32-BOTH-NEXT: vse64.v v8, (a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_256:
; RV64-BOTH: # %bb.0:
; RV64-BOTH-NEXT: addi a1, a0, 240
; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 224
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 208
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 192
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 176
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 160
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 144
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 128
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 112
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 96
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 80
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 64
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 48
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 32
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: addi a1, a0, 16
; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-BOTH-NEXT: vmv.v.i v8, 0
; RV64-BOTH-NEXT: vse64.v v8, (a1)
; RV64-BOTH-NEXT: vse64.v v8, (a0)
; RV64-BOTH-NEXT: ret
Expand Down

0 comments on commit 660b740

Please sign in to comment.