Skip to content

Commit

Permalink
[LICM & MSSA] Limit unsafe sinking and hoisting.
Browse files Browse the repository at this point in the history
Summary:
The getClobberingMemoryAccess API checks for clobbering accesses in a loop by walking the backedge. This may check if a memory access is being
clobbered by the loop in a previous iteration, depending how smart AA got over the course of the updates in MemorySSA (it does not occur when built from scratch).
If no clobbering access is found inside the loop, it will optimize to an access outside the loop. This however does not mean that access is safe to sink.
Given:
```
for i
  load a[i]
  store a[i]
```
The access corresponding to the load can be optimized to outside the loop, and the load can be hoisted. But it is incorrect to sink it.
In order to sink the load, we'd need to check no Def clobbers the Use in the same iteration. With this patch we currently restrict sinking to either
Defs not existing in the loop, or Defs preceding the load in the same block. An easy extension is to ensure the load (Use) post-dominates all Defs.

Caught by PR42294.

This issue also shed light on the converse problem: hoisting stores in this same scenario would be illegal. With this patch we restrict
hoisting of stores to the case when their corresponding Defs are dominating all Uses in the loop.

Reviewers: george.burgess.iv

Subscribers: jlebar, Prazek, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63582

llvm-svn: 363982
  • Loading branch information
alinas committed Jun 20, 2019
1 parent 3207566 commit d0b1169
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 12 deletions.
1 change: 1 addition & 0 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Expand Up @@ -106,6 +106,7 @@ struct SinkAndHoistLICMFlags {
unsigned LicmMssaOptCounter;
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
bool IsSink;
};

/// Walk the specified region of the CFG (defined by all blocks
Expand Down
60 changes: 50 additions & 10 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Expand Up @@ -376,10 +376,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
LicmMssaOptCap, LicmMssaNoAccForPromotionCap};
LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*IsSink=*/true};
if (L->hasDedicatedExits())
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
Flags.IsSink = false;
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
Expand Down Expand Up @@ -1224,6 +1226,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Could do better here, but this is conservatively correct.
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
// moving accesses. Can also extend to dominating uses.
auto *SIMD = MSSA->getMemoryAccess(SI);
for (auto *BB : CurLoop->getBlocks())
if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
for (const auto &MA : *Accesses)
Expand All @@ -1232,6 +1235,12 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (!MSSA->isLiveOnEntryDef(MD) &&
CurLoop->contains(MD->getBlock()))
return false;
// Disable hoisting past potentially interfering loads. Optimized
// Uses may point to an access outside the loop, as getClobbering
// checks the previous iteration when walking the backedge.
// FIXME: More precise: no Uses that alias SI.
if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
return false;
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
(void)LI; // Silence warning.
Expand Down Expand Up @@ -2257,16 +2266,47 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop,
SinkAndHoistLICMFlags &Flags) {
MemoryAccess *Source;
// See declaration of SetLicmMssaOptCap for usage details.
if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
Source = MU->getDefiningAccess();
else {
Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
Flags.LicmMssaOptCounter++;
// For hoisting, use the walker to determine safety
if (!Flags.IsSink) {
MemoryAccess *Source;
// See declaration of SetLicmMssaOptCap for usage details.
if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
Source = MU->getDefiningAccess();
else {
Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
Flags.LicmMssaOptCounter++;
}
return !MSSA->isLiveOnEntryDef(Source) &&
CurLoop->contains(Source->getBlock());
}
return !MSSA->isLiveOnEntryDef(Source) &&
CurLoop->contains(Source->getBlock());

// For sinking, we'd need to check all Defs below this use. The getClobbering
// call will look on the backedge of the loop, but will check aliasing with
// the instructions on the previous iteration.
// For example:
// for (i ... )
// load a[i] ( Use (LoE)
// store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
// i++;
// The load sees no clobbering inside the loop, as the backedge alias check
// does phi translation, and will check aliasing against store a[i-1].
// However sinking the load outside the loop, below the store is incorrect.

// For now, only sink if there are no Defs in the loop, and the existing ones
// precede the use and are in the same block.
// FIXME: Increase precision: Safe to sink if Use post dominates the Def;
// needs PostDominatorTreeAnalysis.
// FIXME: More precise: no Defs that alias this Use.
if (Flags.NoOfMemAccTooLarge)
return true;
for (auto *BB : CurLoop->getBlocks())
if (auto *Accesses = MSSA->getBlockDefs(BB))
for (const auto &MA : *Accesses)
if (const auto *MD = dyn_cast<MemoryDef>(&MA))
if (MU->getBlock() != MD->getBlock() ||
!MSSA->locallyDominates(MD, MU))
return true;
return false;
}

/// Little predicate that returns true if the specified basic block is in
Expand Down
48 changes: 48 additions & 0 deletions llvm/test/Analysis/MemorySSA/pr42294.ll
@@ -0,0 +1,48 @@
; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=true -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=false -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=true | FileCheck %s
; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=false | FileCheck %s

; LICM: Using
; LICM-NOT: LICM sinking instruction: %.pre = load i8, i8* %arrayidx.phi.trans.insert

; CHECK-LABEL: @fn1
; CHECK-LABEL: entry:
; CHECK: br i1 true, label %[[END:.*]], label %[[PH:.*]]
; CHECK: [[PH]]:
; CHECK: br label %[[CRIT:.*]]
; CHECK: [[CRIT]]:
; CHECK: load i8
; CHECK: store i8
; CHECK: br i1 true, label %[[ENDCRIT:.*]], label %[[CRIT]]
; CHECK: [[ENDCRIT]]:
; CHECK-NOT: load i8
; CHECK: br label %[[END]]

target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
target triple = "s390x-unknown-linux-gnu"

define void @fn1() {
entry:
%g = alloca [9 x i8], align 1
br label %for.body

for.body: ; preds = %for.body.for.body_crit_edge, %entry
%0 = phi i64 [ 0, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
%arrayidx = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv
store i8 2, i8* %arrayidx, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br i1 undef, label %for.end18, label %for.body.for.body_crit_edge

for.body.for.body_crit_edge: ; preds = %for.body
%arrayidx.phi.trans.insert = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv.next
%.pre = load i8, i8* %arrayidx.phi.trans.insert, align 1
%phitmp = zext i8 %.pre to i64
br label %for.body

for.end18: ; preds = %for.body
store i64 %0, i64* undef, align 8
ret void
}

7 changes: 5 additions & 2 deletions llvm/test/Transforms/LICM/store-hoisting.ll
Expand Up @@ -348,8 +348,11 @@ exit:
; the load must observe.
define i32 @test_dominated_read(i32* %loc) {
; CHECK-LABEL: @test_dominated_read
; CHECK-LABEL: exit:
; CHECK: store i32 0, i32* %loc
; MSSA-LABEL: entry:
; MSSA: store i32 0, i32* %loc
; MSSA-LABEL: loop:
; AST-LABEL: exit:
; AST: store i32 0, i32* %loc
entry:
br label %loop

Expand Down

0 comments on commit d0b1169

Please sign in to comment.