From ae1521f2f890e250d11b70d094261ce0c0109cc6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 29 Oct 2025 09:45:41 +0100 Subject: [PATCH] [MemCpyOpt] Allow stack move optimization if one address captured Allow the stack move optimization (which merges two allocas) when the address of only one alloca is captured (and the provenance is not captured). Both addresses need to be captured to observe that the allocas were merged. --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 20 +++++-- llvm/test/Transforms/MemCpyOpt/stack-move.ll | 58 +++++++++++++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index e043d072a7638..08be5df9872b7 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, bool SrcNotDom = false; auto CaptureTrackingWithModRef = - [&](Instruction *AI, - function_ref ModRefCallback) -> bool { + [&](Instruction *AI, function_ref ModRefCallback, + bool &AddressCaptured) -> bool { SmallVector Worklist; Worklist.push_back(AI); unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); @@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, if (!Visited.insert(&U).second) continue; UseCaptureInfo CI = DetermineUseCaptureKind(U, AI); - if (capturesAnything(CI.UseCC)) + if (capturesAnyProvenance(CI.UseCC)) return false; + AddressCaptured |= capturesAddress(CI.UseCC); if (UI->mayReadOrWriteMemory()) { if (UI->isLifetimeStartOrEnd()) { @@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return true; }; - if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) + bool DestAddressCaptured = false; + if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback, + DestAddressCaptured)) return false; // Bailout if Dest may have any ModRef before Store. if (!ReachabilityWorklist.empty() && @@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return true; }; - if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) + bool SrcAddressCaptured = false; + if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback, + SrcAddressCaptured)) + return false; + + // If both the source and destination address are captured, the fact that they + // are no longer two separate allocations may be observed. + if (DestAddressCaptured && SrcAddressCaptured) return false; // We can do the transformation. First, move the SrcAlloca to the start of the diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll index 940e30ec46881..0c2e05fa8fed6 100644 --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -1729,3 +1729,61 @@ define i32 @test_ret_only_capture() { %v = load i32, ptr %a ret i32 %v } + +declare ptr @captures_address_only(ptr captures(address)) + +; Can transform: Only one address captured. +define void @test_captures_address_captures_none() { +; CHECK-LABEL: define void @test_captures_address_captures_none() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]]) +; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dst = alloca %struct.Foo, align 4 + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + call void @captures_address_only(ptr %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false) + call void @use_nocapture(ptr %dst) + ret void +} + +; Can transform: Only one address captured. +define void @test_captures_none_and_captures_address() { +; CHECK-LABEL: define void @test_captures_none_and_captures_address() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]]) +; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dst = alloca %struct.Foo, align 4 + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + call void @use_nocapture(ptr %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false) + call void @captures_address_only(ptr %dst) + ret void +} + +; Cannot transform: Both addresses captured. +define void @test_captures_address_and_captures_address() { +; CHECK-LABEL: define void @test_captures_address_and_captures_address() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @captures_address_only(ptr [[DST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dst = alloca %struct.Foo, align 4 + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + call void @captures_address_only(ptr %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false) + call void @captures_address_only(ptr %dst) + ret void +}