- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[MemCpyOpt] Allow stack move optimization if one address captured #165527
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Allow the stack move optimization (which merges two allocas) when the address of only one alloca is captured (and the provenance is not captured). Both addresses need to be captured to observe that the allocas were merged.
| @llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesAllow the stack move optimization (which merges two allocas) when the address of only one alloca is captured (and the provenance is not captured). Both addresses need to be captured to observe that the allocas were merged. Fixes #165484. Full diff: https://github.com/llvm/llvm-project/pull/165527.diff 2 Files Affected: 
 diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e043d072a7638..08be5df9872b7 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
   bool SrcNotDom = false;
 
   auto CaptureTrackingWithModRef =
-      [&](Instruction *AI,
-          function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+      [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
+          bool &AddressCaptured) -> bool {
     SmallVector<Instruction *, 8> Worklist;
     Worklist.push_back(AI);
     unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
@@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
         if (!Visited.insert(&U).second)
           continue;
         UseCaptureInfo CI = DetermineUseCaptureKind(U, AI);
-        if (capturesAnything(CI.UseCC))
+        if (capturesAnyProvenance(CI.UseCC))
           return false;
+        AddressCaptured |= capturesAddress(CI.UseCC);
 
         if (UI->mayReadOrWriteMemory()) {
           if (UI->isLifetimeStartOrEnd()) {
@@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
     return true;
   };
 
-  if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+  bool DestAddressCaptured = false;
+  if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
+                                 DestAddressCaptured))
     return false;
   // Bailout if Dest may have any ModRef before Store.
   if (!ReachabilityWorklist.empty() &&
@@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
     return true;
   };
 
-  if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+  bool SrcAddressCaptured = false;
+  if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
+                                 SrcAddressCaptured))
+    return false;
+
+  // If both the source and destination address are captured, the fact that they
+  // are no longer two separate allocations may be observed.
+  if (DestAddressCaptured && SrcAddressCaptured)
     return false;
 
   // We can do the transformation. First, move the SrcAlloca to the start of the
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 940e30ec46881..0c2e05fa8fed6 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -1729,3 +1729,61 @@ define i32 @test_ret_only_capture() {
   %v = load i32, ptr %a
   ret i32 %v
 }
+
+declare ptr @captures_address_only(ptr captures(address))
+
+; Can transform: Only one address captured.
+define void @test_captures_address_captures_none() {
+; CHECK-LABEL: define void @test_captures_address_captures_none() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    call void @captures_address_only(ptr [[SRC]])
+; CHECK-NEXT:    call void @use_nocapture(ptr [[SRC]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dst = alloca %struct.Foo, align 4
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  call void @captures_address_only(ptr %src)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
+  call void @use_nocapture(ptr %dst)
+  ret void
+}
+
+; Can transform: Only one address captured.
+define void @test_captures_none_and_captures_address() {
+; CHECK-LABEL: define void @test_captures_none_and_captures_address() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    call void @use_nocapture(ptr [[SRC]])
+; CHECK-NEXT:    call void @captures_address_only(ptr [[SRC]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dst = alloca %struct.Foo, align 4
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  call void @use_nocapture(ptr %src)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
+  call void @captures_address_only(ptr %dst)
+  ret void
+}
+
+; Cannot transform: Both addresses captured.
+define void @test_captures_address_and_captures_address() {
+; CHECK-LABEL: define void @test_captures_address_and_captures_address() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    call void @captures_address_only(ptr [[SRC]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    call void @captures_address_only(ptr [[DST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dst = alloca %struct.Foo, align 4
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  call void @captures_address_only(ptr %src)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
+  call void @captures_address_only(ptr %dst)
+  ret void
+}
 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…vm#165527) Allow the stack move optimization (which merges two allocas) when the address of only one alloca is captured (and the provenance is not captured). Both addresses need to be captured to observe that the allocas were merged. Fixes llvm#165484.
Allow the stack move optimization (which merges two allocas) when the address of only one alloca is captured (and the provenance is not captured). Both addresses need to be captured to observe that the allocas were merged.
Fixes #165484.