Skip to content

Commit 8f62481

Browse files
authored
[MemCpyOpt] Allow stack move optimization if one address captured (#165527)
Allow the stack move optimization (which merges two allocas) when the address of only one alloca is captured (and the provenance is not captured). Both addresses need to be captured to observe that the allocas were merged. Fixes #165484.
1 parent 30579c0 commit 8f62481

File tree

2 files changed

+73
-5
lines changed

2 files changed

+73
-5
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15341534
bool SrcNotDom = false;
15351535

15361536
auto CaptureTrackingWithModRef =
1537-
[&](Instruction *AI,
1538-
function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1537+
[&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
1538+
bool &AddressCaptured) -> bool {
15391539
SmallVector<Instruction *, 8> Worklist;
15401540
Worklist.push_back(AI);
15411541
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
@@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15591559
if (!Visited.insert(&U).second)
15601560
continue;
15611561
UseCaptureInfo CI = DetermineUseCaptureKind(U, AI);
1562-
if (capturesAnything(CI.UseCC))
1562+
if (capturesAnyProvenance(CI.UseCC))
15631563
return false;
1564+
AddressCaptured |= capturesAddress(CI.UseCC);
15641565

15651566
if (UI->mayReadOrWriteMemory()) {
15661567
if (UI->isLifetimeStartOrEnd()) {
@@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16271628
return true;
16281629
};
16291630

1630-
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1631+
bool DestAddressCaptured = false;
1632+
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
1633+
DestAddressCaptured))
16311634
return false;
16321635
// Bailout if Dest may have any ModRef before Store.
16331636
if (!ReachabilityWorklist.empty() &&
@@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16531656
return true;
16541657
};
16551658

1656-
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1659+
bool SrcAddressCaptured = false;
1660+
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
1661+
SrcAddressCaptured))
1662+
return false;
1663+
1664+
// If both the source and destination address are captured, the fact that they
1665+
// are no longer two separate allocations may be observed.
1666+
if (DestAddressCaptured && SrcAddressCaptured)
16571667
return false;
16581668

16591669
// We can do the transformation. First, move the SrcAlloca to the start of the

llvm/test/Transforms/MemCpyOpt/stack-move.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,3 +1729,61 @@ define i32 @test_ret_only_capture() {
17291729
%v = load i32, ptr %a
17301730
ret i32 %v
17311731
}
1732+
1733+
declare ptr @captures_address_only(ptr captures(address))
1734+
1735+
; Can transform: Only one address captured.
1736+
define void @test_captures_address_captures_none() {
1737+
; CHECK-LABEL: define void @test_captures_address_captures_none() {
1738+
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
1739+
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
1740+
; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]])
1741+
; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]])
1742+
; CHECK-NEXT: ret void
1743+
;
1744+
%src = alloca %struct.Foo, align 4
1745+
%dst = alloca %struct.Foo, align 4
1746+
store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
1747+
call void @captures_address_only(ptr %src)
1748+
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
1749+
call void @use_nocapture(ptr %dst)
1750+
ret void
1751+
}
1752+
1753+
; Can transform: Only one address captured.
1754+
define void @test_captures_none_and_captures_address() {
1755+
; CHECK-LABEL: define void @test_captures_none_and_captures_address() {
1756+
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
1757+
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
1758+
; CHECK-NEXT: call void @use_nocapture(ptr [[SRC]])
1759+
; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]])
1760+
; CHECK-NEXT: ret void
1761+
;
1762+
%src = alloca %struct.Foo, align 4
1763+
%dst = alloca %struct.Foo, align 4
1764+
store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
1765+
call void @use_nocapture(ptr %src)
1766+
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
1767+
call void @captures_address_only(ptr %dst)
1768+
ret void
1769+
}
1770+
1771+
; Cannot transform: Both addresses captured.
1772+
define void @test_captures_address_and_captures_address() {
1773+
; CHECK-LABEL: define void @test_captures_address_and_captures_address() {
1774+
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
1775+
; CHECK-NEXT: [[DST:%.*]] = alloca [[STRUCT_FOO]], align 4
1776+
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
1777+
; CHECK-NEXT: call void @captures_address_only(ptr [[SRC]])
1778+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[SRC]], i64 12, i1 false)
1779+
; CHECK-NEXT: call void @captures_address_only(ptr [[DST]])
1780+
; CHECK-NEXT: ret void
1781+
;
1782+
%src = alloca %struct.Foo, align 4
1783+
%dst = alloca %struct.Foo, align 4
1784+
store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
1785+
call void @captures_address_only(ptr %src)
1786+
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr align 4 %src, i64 12, i1 false)
1787+
call void @captures_address_only(ptr %dst)
1788+
ret void
1789+
}

0 commit comments

Comments
 (0)