Skip to content

Commit

Permalink
[DSE] Eliminate stores at the end of the function.
Browse files Browse the repository at this point in the history
This patch add support for eliminating MemoryDefs that do not have any
aliasing users, which indicates that there are no reads/writes to the
memory location until the end of the function.

To eliminate such defs, we have to ensure that the underlying object is
not visible in the caller and does not escape via returning. We need a
separate check for that, as InvisibleToCaller does not consider returns.

Reviewers: dmgreen, rnk, efriedma, bryant, asbirlea, Tyker, george.burgess.iv

Reviewed By: asbirlea

Differential Revision: https://reviews.llvm.org/D72631
  • Loading branch information
fhahn committed Jun 24, 2020
1 parent 0f42693 commit 4e62c63
Show file tree
Hide file tree
Showing 14 changed files with 156 additions and 110 deletions.
93 changes: 91 additions & 2 deletions llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1535,7 +1535,7 @@ struct DSEState {

auto *MD = dyn_cast_or_null<MemoryDef>(MA);
if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I))
State.getLocForWriteEx(&I) && isRemovable(&I))
State.MemDefs.push_back(MD);

// Track whether alloca and alloca-like objects are visible in the
Expand Down Expand Up @@ -1621,7 +1621,54 @@ struct DSEState {
UseInst, IOL, AA, &F) == OW_Complete;
}

/// Returns true if \p Use may read from \p DefLoc.
/// Returns true if \p Def is not read before returning from the function.
bool isWriteAtEndOfFunction(MemoryDef *Def) {
LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
<< *Def->getMemoryInst()
<< ") is at the end the function \n");

auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst());
if (!MaybeLoc) {
LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
return false;
}

SmallVector<MemoryAccess *, 4> WorkList;
SmallPtrSet<MemoryAccess *, 8> Visited;
auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
if (!Visited.insert(Acc).second)
return;
for (Use &U : Acc->uses())
WorkList.push_back(cast<MemoryAccess>(U.getUser()));
};
PushMemUses(Def);
for (unsigned I = 0; I < WorkList.size(); I++) {
if (WorkList.size() >= MemorySSAScanLimit) {
LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
return false;
}

MemoryAccess *UseAccess = WorkList[I];
if (isa<MemoryPhi>(UseAccess)) {
PushMemUses(UseAccess);
continue;
}

// TODO: Checking for aliasing is expensive. Consider reducing the amount
// of times this is called and/or caching it.
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
if (isReadClobber(*MaybeLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
return false;
}

if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
PushMemUses(UseDef);
}
return true;
}

// Returns true if \p Use may read from \p DefLoc.
bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const {
if (!UseInst->mayReadFromMemory())
return false;
Expand Down Expand Up @@ -1923,6 +1970,47 @@ struct DSEState {
return false;
}

/// Eliminate writes to objects that are not visible in the caller and are not
/// accessed before returning from the function.
bool eliminateDeadWritesAtEndOfFunction() {
const DataLayout &DL = F.getParent()->getDataLayout();
bool MadeChange = false;
LLVM_DEBUG(
dbgs()
<< "Trying to eliminate MemoryDefs at the end of the function\n");
for (int I = MemDefs.size() - 1; I >= 0; I--) {
MemoryDef *Def = MemDefs[I];
if (SkipStores.find(Def) != SkipStores.end())
continue;

// TODO: Consider doing the underlying object check first, if it is
// beneficial compile-time wise.
if (isWriteAtEndOfFunction(Def)) {
Instruction *DefI = Def->getMemoryInst();
// See through pointer-to-pointer bitcasts
SmallVector<const Value *, 4> Pointers;
GetUnderlyingObjects(getLocForWriteEx(DefI)->Ptr, Pointers, DL);

LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
"of the function\n");
bool CanKill = true;
for (const Value *Pointer : Pointers) {
if (!InvisibleToCallerAfterRet.count(Pointer)) {
CanKill = false;
break;
}
}

if (CanKill) {
deleteDeadInstruction(DefI);
++NumFastStores;
MadeChange = true;
}
}
}
return MadeChange;
}

/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
bool storeIsNoop(MemoryDef *Def, MemoryLocation DefLoc, const Value *DefUO) {
Expand Down Expand Up @@ -2122,6 +2210,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
for (auto &KV : State.IOLs)
MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second);

MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
}
} // end anonymous namespace
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; XFAIL: *
; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
Expand Down
1 change: 0 additions & 1 deletion llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; XFAIL: *
; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s

declare i8* @strcpy(i8* %dest, i8* %src) nounwind
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ attributes #2 = { argmemonly nounwind }
!20 = !DILocation(line: 9, column: 5, scope: !14)
!21 = !DILocation(line: 10, column: 1, scope: !14)

; Check that the store is removed and that the memcpy is still there
; Check that the both the store and memcpy are removed because they both access
; an alloca that is not read.
; CHECK-LABEL: foo
; CHECK-NOT: store i8
; CHECK: call void @llvm.memcpy
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; XFAIL: *
; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s

declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,12 @@ exit:
call void @capture(i8* %m)
ret i8* %m
}
; TODO: Remove store in exit.
; Stores to stack objects can be eliminated if they are not captured inside the function.
define void @test_alloca_nocapture_1() {
; CHECK-LABEL: @test_alloca_nocapture_1(
; CHECK-NEXT: [[M:%.*]] = alloca i8
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
Expand All @@ -228,7 +225,6 @@ exit:
ret void
}

; TODO: Remove store in exit.
; Cannot remove first store i8 0, i8* %m, as the call to @capture captures the object.
define void @test_alloca_capture_1() {
; CHECK-LABEL: @test_alloca_capture_1(
Expand All @@ -237,7 +233,6 @@ define void @test_alloca_capture_1() {
; CHECK-NEXT: call void @capture(i8* [[M]])
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
Expand All @@ -250,7 +245,6 @@ exit:
ret void
}

; TODO: Remove store at exit.
; We can remove the last store to %m, even though it escapes because the alloca
; becomes invalid after the function returns.
define void @test_alloca_capture_2(%S1* %E) {
Expand All @@ -260,7 +254,6 @@ define void @test_alloca_capture_2(%S1* %E) {
; CHECK: exit:
; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0
; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]]
; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none []
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: store i32 40, i32* [[SV]]
; CHECK-NEXT: ret void
;
block1:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@ define void @test27() {
; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i64 10)
; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,18 +123,10 @@ bb3:
define void @alloca_1(i1 %c) {
; CHECK-LABEL: @alloca_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32]
; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: ret void
Expand All @@ -160,20 +152,10 @@ bb3:
define void @alloca_2(i1 %c) {
; CHECK-LABEL: @alloca_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32]
; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -204,20 +186,12 @@ bb3:
define void @alloca_3(i1 %c) {
; CHECK-LABEL: @alloca_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32]
; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -589,14 +589,11 @@ define void @alloca_5(i1 %c) {
; CHECK-LABEL: @alloca_5(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BLAM_4:%.*]], align 8
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 1
; CHECK-NEXT: [[TMP37:%.*]] = bitcast i64** [[TMP36]] to i8*
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 3
; CHECK-NEXT: [[TMP39:%.*]] = bitcast i64* [[TMP38]] to i64*
; CHECK-NEXT: store i64 0, i64* [[TMP39]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB46:%.*]], label [[BB47:%.*]]
; CHECK: bb46:
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP37]], i8 0, i64 26, i1 false)
; CHECK-NEXT: ret void
; CHECK: bb47:
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,8 @@ bb3:

define void @test11() {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[P:%.*]] = alloca i32, align 4
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: store i32 0, i32* [[P]], align 4
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; XFAIL: *
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s

declare noalias i8* @malloc(i64) "malloc-like"
Expand Down
62 changes: 0 additions & 62 deletions llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n
declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
declare void @llvm.init.trampoline(i8*, i8*, i8*)

; Test for byval handling.
%struct.x = type { i32, i32, i32, i32 }
define void @test9(%struct.x* byval %a) nounwind {
; CHECK-LABEL: @test9(
; CHECK-NEXT: ret void
;
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
store i32 1, i32* %tmp2, align 4
ret void
}

; Test for inalloca handling.
define void @test9_2(%struct.x* inalloca %a) nounwind {
; CHECK-LABEL: @test9_2(
; CHECK-NEXT: ret void
;
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
store i32 1, i32* %tmp2, align 4
ret void
}

; Test for preallocated handling.
define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
; CHECK-LABEL: @test9_3(
; CHECK-NEXT: ret void
;
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
store i32 1, i32* %tmp2, align 4
ret void
}

; DSE should delete the dead trampoline.
declare void @test11f()
define void @test11() {
Expand All @@ -56,37 +25,6 @@ define void @test11() {

declare noalias i8* @malloc(i32)

define void @test14(i32* %Q) {
; CHECK-LABEL: @test14(
; CHECK-NEXT: ret void
;
%P = alloca i32
%DEAD = load i32, i32* %Q
store i32 %DEAD, i32* %P
ret void

}

define void @test20() {
; CHECK-LABEL: @test20(
; CHECK-NEXT: ret void
;
%m = call i8* @malloc(i32 24)
store i8 0, i8* %m
ret void
}

define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
; CHECK-LABEL: @test22(
; CHECK-NEXT: ret void
;
%k.addr = alloca i32
%m.addr = alloca i32
%k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr
store i32 0, i32* %k.addr.m.addr, align 4
ret void
}

declare void @unknown_func()

; Remove redundant store if loaded value is in another block inside a loop.
Expand Down
Loading

0 comments on commit 4e62c63

Please sign in to comment.