Skip to content

[DSE] Fold small malloc + zero-store into calloc #106069

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from

Conversation

pedroclobo
Copy link
Member

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 26, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Pedro Lobo (pedroclobo)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/106069.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+34-12)
  • (modified) llvm/test/Transforms/DeadStoreElimination/noop-stores.ll (+10)
  • (modified) llvm/test/Transforms/DeadStoreElimination/simple.ll (+4-6)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 992139a95a43d3..4d271030764d3b 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1849,10 +1849,12 @@ struct DSEState {
   bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
     Instruction *DefI = Def->getMemoryInst();
     MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
-    if (!MemSet)
-      // TODO: Could handle zero store to small allocation as well.
+    StoreInst *Store = dyn_cast<StoreInst>(DefI);
+    if (!MemSet && !Store)
       return false;
-    Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
+    Constant *StoredConstant =
+        MemSet ? dyn_cast<Constant>(MemSet->getValue())
+               : dyn_cast<Constant>(Store->getValueOperand());
     if (!StoredConstant || !StoredConstant->isNullValue())
       return false;
 
@@ -1880,14 +1882,16 @@ struct DSEState {
     if (!MallocDef)
       return false;
 
-    auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
+    auto shouldCreateCalloc = [](CallInst *Malloc, Instruction *MI) {
       // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
       // of malloc block
       auto *MallocBB = Malloc->getParent(),
-        *MemsetBB = Memset->getParent();
+        *MemsetBB = MI->getParent();
       if (MallocBB == MemsetBB)
         return true;
-      auto *Ptr = Memset->getArgOperand(0);
+      auto *Ptr = dyn_cast<CallInst>(MI)
+                      ? dyn_cast<CallInst>(MI)->getArgOperand(0)
+                      : dyn_cast<StoreInst>(MI)->getPointerOperand();
       auto *TI = MallocBB->getTerminator();
       BasicBlock *TrueBB, *FalseBB;
       if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
@@ -1899,12 +1903,30 @@ struct DSEState {
       return true;
     };
 
-    if (Malloc->getOperand(0) != MemSet->getLength())
-      return false;
-    if (!shouldCreateCalloc(Malloc, MemSet) ||
-        !DT.dominates(Malloc, MemSet) ||
-        !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
-      return false;
+    if (MemSet) {
+      if (Malloc->getOperand(0) != MemSet->getLength())
+        return false;
+      if (!shouldCreateCalloc(Malloc, MemSet) ||
+          !DT.dominates(Malloc, MemSet) ||
+          !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
+        return false;
+    } else {
+      // Handle zero store to small allocation.
+      assert(Store && "Expected Store");
+      ConstantInt *MallocSize = dyn_cast<ConstantInt>(Malloc->getOperand(0));
+      if (!MallocSize)
+        return false;
+      // Allow folding of stores of 1/2/4/8 bytes. The malloc size must match
+      // the store size.
+      uint64_t Size = MallocSize->getLimitedValue();
+      uint64_t StoreSize =
+          DL.getTypeStoreSize(Store->getValueOperand()->getType());
+      if (Size > 8 || (Size & (Size - 1)) || Size != StoreSize)
+        return false;
+      if (!shouldCreateCalloc(Malloc, Store) || !DT.dominates(Malloc, Store) ||
+          !memoryIsNotModifiedBetween(Malloc, Store, BatchAA, DL, &DT))
+        return false;
+    }
     IRBuilder<> IRB(Malloc);
     Type *SizeTTy = Malloc->getArgOperand(0)->getType();
     auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 9fc20d76da5eb4..8b9a7b7ec128e5 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -350,6 +350,16 @@ define ptr @zero_memset_after_malloc_with_different_sizes(i64 %size) {
   ret ptr %call
 }
 
+define ptr @zero_store_after_malloc() {
+; CHECK-LABEL: @zero_store_after_malloc(
+; CHECK-NEXT:    [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 8)
+; CHECK-NEXT:    ret ptr [[CALLOC]]
+;
+  %call = call ptr @malloc(i64 8) inaccessiblememonly
+  store i64 0, ptr %call
+  ret ptr %call
+}
+
 ; based on pr25892_lite
 define ptr @zero_memset_after_new(i64 %size) {
 ; CHECK-LABEL: @zero_memset_after_new(
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index ef2c4ef564b24a..6515657d89bff3 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -204,10 +204,9 @@ define void @test_matrix_store(i64 %stride) {
 declare void @may_unwind()
 define ptr @test_malloc_no_escape_before_return() {
 ; CHECK-LABEL: @test_malloc_no_escape_before_return(
-; CHECK-NEXT:    [[PTR:%.*]] = tail call ptr @malloc(i64 4)
+; CHECK-NEXT:    [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
 ; CHECK-NEXT:    call void @may_unwind()
-; CHECK-NEXT:    store i32 0, ptr [[PTR]], align 4
-; CHECK-NEXT:    ret ptr [[PTR]]
+; CHECK-NEXT:    ret ptr [[CALLOC]]
 ;
   %ptr = tail call ptr @malloc(i64 4)
   %DEAD = load i32, ptr %ptr
@@ -236,10 +235,9 @@ define ptr @test_custom_malloc_no_escape_before_return() {
 
 define ptr addrspace(1) @test13_addrspacecast() {
 ; CHECK-LABEL: @test13_addrspacecast(
-; CHECK-NEXT:    [[P:%.*]] = tail call ptr @malloc(i64 4)
-; CHECK-NEXT:    [[P_AC:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
+; CHECK-NEXT:    [[P_AC:%.*]] = addrspacecast ptr [[CALLOC]] to ptr addrspace(1)
 ; CHECK-NEXT:    call void @may_unwind()
-; CHECK-NEXT:    store i32 0, ptr addrspace(1) [[P_AC]], align 4
 ; CHECK-NEXT:    ret ptr addrspace(1) [[P_AC]]
 ;
   %p = tail call ptr @malloc(i64 4)

Copy link

github-actions bot commented Aug 26, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@dtcxzyw dtcxzyw requested a review from nikic August 26, 2024 13:26
@nikic
Copy link
Contributor

nikic commented Aug 26, 2024

This looks like a duplicate of #87048, see also #87048 (comment) for a prerequisite transform that needs to be implemented before we can consider this.

@nikic
Copy link
Contributor

nikic commented Aug 26, 2024

Also worth noting that this transform is unlikely to be profitable, unless maybe for optsize/minsize. Calloc will be more expensive than a malloc plus a single store. Even ignoring the umulo for the size calculation, there's substantial logic in glibc deciding whether and how exactly the zeroing of the memory needs to happen.

@pedroclobo
Copy link
Member Author

Thanks for the review. I could try to implement the prerequisite transform. However, I agree that the implemented transformation is unlikely to be profitable. I'll close the PR.

@pedroclobo pedroclobo closed this Aug 26, 2024
@pedroclobo pedroclobo deleted the dse-zero-store branch October 14, 2024 11:45
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants