Skip to content

Commit

Permalink
[MSSA] Fix PR28880 by fixing use optimizer's lower bound tracking beh…
Browse files Browse the repository at this point in the history
…avior.

Summary:
In the use optimizer, we need to keep of whether the lower bound still
dominates us or else we may decide a lower bound is still valid when it
is not due to intervening pushes/pops.  Fixes PR28880 (and probably a
bunch of other things).

Reviewers: george.burgess.iv

Subscribers: MatzeB, llvm-commits, sebpop

Differential Revision: https://reviews.llvm.org/D23237

llvm-svn: 277978
  • Loading branch information
dberlin committed Aug 8, 2016
1 parent 02419a9 commit 4b4c722
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 4 deletions.
20 changes: 16 additions & 4 deletions llvm/lib/Transforms/Utils/MemorySSA.cpp
Expand Up @@ -1288,6 +1288,7 @@ class MemorySSA::OptimizeUses {
// Note: Correctness depends on this being initialized to 0, which densemap
// does
unsigned long LowerBound;
const BasicBlock *LowerBoundBlock;
// This is where the last walk for this memory location ended.
unsigned long LastKill;
bool LastKillValid;
Expand Down Expand Up @@ -1333,7 +1334,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
VersionStack.pop_back();
++PopEpoch;
}

for (MemoryAccess &MA : *Accesses) {
auto *MU = dyn_cast<MemoryUse>(&MA);
if (!MU) {
Expand All @@ -1355,13 +1355,24 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
if (LocInfo.PopEpoch != PopEpoch) {
LocInfo.PopEpoch = PopEpoch;
LocInfo.StackEpoch = StackEpoch;
// If the lower bound was in the info we popped, we have to reset it.
if (LocInfo.LowerBound >= VersionStack.size()) {
// If the lower bound was in something that no longer dominates us, we
// have to reset it.
// We can't simply track stack size, because the stack may have had
// pushes/pops in the meantime.
// XXX: This is non-optimal, but only is slower cases with heavily
// branching dominator trees. To get the optimal number of queries would
// be to make lowerbound and lastkill a per-loc stack, and pop it until
// the top of that stack dominates us. This does not seem worth it ATM.
// A much cheaper optimization would be to always explore the deepest
// branch of the dominator tree first. This will guarantee this resets on
// the smallest set of blocks.
if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB &&
!DT->dominates(LocInfo.LowerBoundBlock, BB)){
// Reset the lower bound of things to check.
// TODO: Some day we should be able to reset to last kill, rather than
// 0.

LocInfo.LowerBound = 0;
LocInfo.LowerBoundBlock = VersionStack[0]->getBlock();
LocInfo.LastKillValid = false;
}
} else if (LocInfo.StackEpoch != StackEpoch) {
Expand Down Expand Up @@ -1437,6 +1448,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
MU->setDefiningAccess(VersionStack[LocInfo.LastKill]);
}
LocInfo.LowerBound = VersionStack.size() - 1;
LocInfo.LowerBoundBlock = BB;
}
}

Expand Down
51 changes: 51 additions & 0 deletions llvm/test/Transforms/Util/MemorySSA/pr28880.ll
@@ -0,0 +1,51 @@
; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s

; This testcase is reduced from SingleSource/Benchmarks/Misc/fbench.c
; It is testing to make sure that the MemorySSA use optimizer
; comes up with right answers when dealing with multiple MemoryLocations
; over different blocks. See PR28880 for more details.
@global = external hidden unnamed_addr global double, align 8
@global.1 = external hidden unnamed_addr global double, align 8

; Function Attrs: nounwind ssp uwtable
define hidden fastcc void @hoge() unnamed_addr #0 {
bb:
br i1 undef, label %bb1, label %bb2

bb1: ; preds = %bb
; These accesses should not conflict.
; CHECK: 1 = MemoryDef(liveOnEntry)
; 1 = MemoryDef(liveOnEntry)
; CHECK-NEXT: store double undef, double* @global, align 8
store double undef, double* @global, align 8
; CHECK: MemoryUse(liveOnEntry)
; MemoryUse(liveOnEntry)
; CHECK-NEXT: %tmp = load double, double* @global.1, align 8
%tmp = load double, double* @global.1, align 8
unreachable

bb2: ; preds = %bb
br label %bb3

bb3: ; preds = %bb2
br i1 undef, label %bb4, label %bb6

bb4: ; preds = %bb3
; These accesses should conflict.
; CHECK: 2 = MemoryDef(liveOnEntry)
; 2 = MemoryDef(liveOnEntry)
; CHECK-NEXT: store double 0.000000e+00, double* @global.1, align 8
store double 0.000000e+00, double* @global.1, align 8
; CHECK: MemoryUse(2)
; MemoryUse(2)
; CHECK-NEXT: %tmp5 = load double, double* @global.1, align 8
%tmp5 = load double, double* @global.1, align 8
unreachable

bb6: ; preds = %bb3
unreachable
}

attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }

0 comments on commit 4b4c722

Please sign in to comment.