-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[DSE] Fold small malloc + zero-store into calloc #106069
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Pedro Lobo (pedroclobo) ChangesFull diff: https://github.com/llvm/llvm-project/pull/106069.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 992139a95a43d3..4d271030764d3b 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1849,10 +1849,12 @@ struct DSEState {
bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
Instruction *DefI = Def->getMemoryInst();
MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
- if (!MemSet)
- // TODO: Could handle zero store to small allocation as well.
+ StoreInst *Store = dyn_cast<StoreInst>(DefI);
+ if (!MemSet && !Store)
return false;
- Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
+ Constant *StoredConstant =
+ MemSet ? dyn_cast<Constant>(MemSet->getValue())
+ : dyn_cast<Constant>(Store->getValueOperand());
if (!StoredConstant || !StoredConstant->isNullValue())
return false;
@@ -1880,14 +1882,16 @@ struct DSEState {
if (!MallocDef)
return false;
- auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
+ auto shouldCreateCalloc = [](CallInst *Malloc, Instruction *MI) {
// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
// of malloc block
auto *MallocBB = Malloc->getParent(),
- *MemsetBB = Memset->getParent();
+ *MemsetBB = MI->getParent();
if (MallocBB == MemsetBB)
return true;
- auto *Ptr = Memset->getArgOperand(0);
+ auto *Ptr = dyn_cast<CallInst>(MI)
+ ? dyn_cast<CallInst>(MI)->getArgOperand(0)
+ : dyn_cast<StoreInst>(MI)->getPointerOperand();
auto *TI = MallocBB->getTerminator();
BasicBlock *TrueBB, *FalseBB;
if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
@@ -1899,12 +1903,30 @@ struct DSEState {
return true;
};
- if (Malloc->getOperand(0) != MemSet->getLength())
- return false;
- if (!shouldCreateCalloc(Malloc, MemSet) ||
- !DT.dominates(Malloc, MemSet) ||
- !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
- return false;
+ if (MemSet) {
+ if (Malloc->getOperand(0) != MemSet->getLength())
+ return false;
+ if (!shouldCreateCalloc(Malloc, MemSet) ||
+ !DT.dominates(Malloc, MemSet) ||
+ !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
+ return false;
+ } else {
+ // Handle zero store to small allocation.
+ assert(Store && "Expected Store");
+ ConstantInt *MallocSize = dyn_cast<ConstantInt>(Malloc->getOperand(0));
+ if (!MallocSize)
+ return false;
+ // Allow folding of stores of 1/2/4/8 bytes. The malloc size must match
+ // the store size.
+ uint64_t Size = MallocSize->getLimitedValue();
+ uint64_t StoreSize =
+ DL.getTypeStoreSize(Store->getValueOperand()->getType());
+ if (Size > 8 || (Size & (Size - 1)) || Size != StoreSize)
+ return false;
+ if (!shouldCreateCalloc(Malloc, Store) || !DT.dominates(Malloc, Store) ||
+ !memoryIsNotModifiedBetween(Malloc, Store, BatchAA, DL, &DT))
+ return false;
+ }
IRBuilder<> IRB(Malloc);
Type *SizeTTy = Malloc->getArgOperand(0)->getType();
auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 9fc20d76da5eb4..8b9a7b7ec128e5 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -350,6 +350,16 @@ define ptr @zero_memset_after_malloc_with_different_sizes(i64 %size) {
ret ptr %call
}
+define ptr @zero_store_after_malloc() {
+; CHECK-LABEL: @zero_store_after_malloc(
+; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 8)
+; CHECK-NEXT: ret ptr [[CALLOC]]
+;
+ %call = call ptr @malloc(i64 8) inaccessiblememonly
+ store i64 0, ptr %call
+ ret ptr %call
+}
+
; based on pr25892_lite
define ptr @zero_memset_after_new(i64 %size) {
; CHECK-LABEL: @zero_memset_after_new(
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index ef2c4ef564b24a..6515657d89bff3 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -204,10 +204,9 @@ define void @test_matrix_store(i64 %stride) {
declare void @may_unwind()
define ptr @test_malloc_no_escape_before_return() {
; CHECK-LABEL: @test_malloc_no_escape_before_return(
-; CHECK-NEXT: [[PTR:%.*]] = tail call ptr @malloc(i64 4)
+; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
; CHECK-NEXT: call void @may_unwind()
-; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4
-; CHECK-NEXT: ret ptr [[PTR]]
+; CHECK-NEXT: ret ptr [[CALLOC]]
;
%ptr = tail call ptr @malloc(i64 4)
%DEAD = load i32, ptr %ptr
@@ -236,10 +235,9 @@ define ptr @test_custom_malloc_no_escape_before_return() {
define ptr addrspace(1) @test13_addrspacecast() {
; CHECK-LABEL: @test13_addrspacecast(
-; CHECK-NEXT: [[P:%.*]] = tail call ptr @malloc(i64 4)
-; CHECK-NEXT: [[P_AC:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
+; CHECK-NEXT: [[P_AC:%.*]] = addrspacecast ptr [[CALLOC]] to ptr addrspace(1)
; CHECK-NEXT: call void @may_unwind()
-; CHECK-NEXT: store i32 0, ptr addrspace(1) [[P_AC]], align 4
; CHECK-NEXT: ret ptr addrspace(1) [[P_AC]]
;
%p = tail call ptr @malloc(i64 4)
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
92ad678
to
78e16e9
Compare
This looks like a duplicate of #87048, see also #87048 (comment) for a prerequisite transform that needs to be implemented before we can consider this. |
Also worth noting that this transform is unlikely to be profitable, unless maybe for optsize/minsize. Calloc will be more expensive than a malloc plus a single store. Even ignoring the umulo for the size calculation, there's substantial logic in glibc deciding whether and how exactly the zeroing of the memory needs to happen. |
Thanks for the review. I could try to implement the prerequisite transform. However, I agree that the implemented transformation is unlikely to be profitable. I'll close the PR. |
No description provided.