Skip to content

Commit

Permalink
[LICM] Allow sinking when foldable in loop
Browse files Browse the repository at this point in the history
Summary:
Continue trying to sink an instruction if its users in the loop is foldable.
This will allow the instruction to be folded in the loop by decoupling it from
the user outside of the loop.

Reviewers: hfinkel, majnemer, davidxl, efriedma, danielcdh, bmakam, mcrosier

Reviewed By: hfinkel

Subscribers: javed.absar, bmakam, mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D37076

llvm-svn: 320823
  • Loading branch information
Jun Bum Lim committed Dec 15, 2017
1 parent 18b0d1e commit 22855c2
Show file tree
Hide file tree
Showing 3 changed files with 228 additions and 31 deletions.
5 changes: 3 additions & 2 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Expand Up @@ -436,8 +436,9 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, Loop *, AliasSetTracker *,
LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
TargetLibraryInfo *, TargetTransformInfo *, Loop *,
AliasSetTracker *, LoopSafetyInfo *,
OptimizationRemarkEmitter *ORE);

/// \brief Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
Expand Down
107 changes: 78 additions & 29 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Expand Up @@ -90,14 +90,15 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
"invariance in loop using invariant start (default = 8)"));

static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop);
static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);
OptimizationRemarkEmitter *ORE, bool FreeInLoop);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
Expand All @@ -115,7 +116,8 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
namespace {
struct LoopInvariantCodeMotion {
bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE, bool DeleteAST);

DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
Expand Down Expand Up @@ -159,6 +161,8 @@ struct LegacyLICMPass : public LoopPass {
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent()),
SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
}

Expand All @@ -170,6 +174,7 @@ struct LegacyLICMPass : public LoopPass {
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (EnableMSSALoopDependency)
AU.addRequired<MemorySSAWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
}

Expand Down Expand Up @@ -210,8 +215,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
"cached at a higher level");

LoopInvariantCodeMotion LICM;
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, AR.MSSA, ORE,
true))
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
AR.MSSA, ORE, true))
return PreservedAnalyses::all();

auto PA = getLoopPassPreservedAnalyses();
Expand All @@ -224,6 +229,7 @@ INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
Expand All @@ -236,12 +242,10 @@ Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
/// We should delete AST for inner loops in the new pass manager to avoid
/// memory leak.
///
bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
LoopInfo *LI, DominatorTree *DT,
TargetLibraryInfo *TLI,
ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE,
bool DeleteAST) {
bool LoopInvariantCodeMotion::runOnLoop(
Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
bool Changed = false;

assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
Expand All @@ -266,7 +270,7 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
// instructions, we perform another pass to hoist them out of the loop.
//
if (L->hasDedicatedExits())
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST, &SafetyInfo, ORE);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
Expand Down Expand Up @@ -359,7 +363,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
DominatorTree *DT, TargetLibraryInfo *TLI,
TargetTransformInfo *TTI, Loop *CurLoop,
AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {

Expand Down Expand Up @@ -400,12 +405,15 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
bool FreeInLoop = false;
if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) {
++II;
CurAST->deleteValue(&I);
I.eraseFromParent();
if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
if (!FreeInLoop) {
++II;
CurAST->deleteValue(&I);
I.eraseFromParent();
}
Changed = true;
}
}
Expand Down Expand Up @@ -708,13 +716,39 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
return true;
}

/// Return true if the instruction is free in the loop.
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
const TargetTransformInfo *TTI) {
if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
if (TTI->getUserCost(&I) != TargetTransformInfo::TCC_Free)
return false;
// For a GEP, we cannot simply use getUserCost because currently it
// optimistically assume that a GEP will fold into addressing mode
// regardless of its users.
const BasicBlock *BB = I.getParent();
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (CurLoop->contains(UI) &&
(BB != UI->getParent() ||
(!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
return false;
}
return true;
} else
return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
}

/// Return true if the only users of this instruction are outside of
/// the loop. If this is true, we can sink the instruction to the exit
/// blocks of the loop.
///
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo) {
/// We also return true if the instruction could be folded away in lowering.
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop) {
const auto &BlockColors = SafetyInfo->BlockColors;
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
Expand All @@ -731,8 +765,13 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
return false;
}

if (CurLoop->contains(UI))
if (CurLoop->contains(UI)) {
if (IsFree) {
FreeInLoop = true;
continue;
}
return false;
}
}
return true;
}
Expand Down Expand Up @@ -888,7 +927,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
Expand All @@ -900,7 +939,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
else if (isa<CallInst>(I))
++NumMovedCalls;
++NumSunk;
Changed = true;

// Iterate over users to be ready for actual sinking. Replace users via
// unrechable blocks with undef and make all user PHIs trivially replcable.
Expand All @@ -910,11 +948,12 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
Use &U = UI.getUse();
++UI;

if (VisitedUsers.count(User))
if (VisitedUsers.count(User) || CurLoop->contains(User))
continue;

if (!DT->isReachableFromEntry(User->getParent())) {
U = UndefValue::get(I.getType());
Changed = true;
continue;
}

Expand All @@ -927,6 +966,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {
U = UndefValue::get(I.getType());
Changed = true;
continue;
}

Expand All @@ -935,7 +975,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
continue;

if (!canSplitPredecessors(PN))
return false;
return Changed;

// Split predecessors of the PHI so that we can make users trivially
// replacable.
Expand All @@ -947,6 +987,9 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
UE = I.user_end();
}

if (VisitedUsers.empty())
return Changed;

#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
Expand All @@ -960,16 +1003,22 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
while (!I.use_empty()) {
Value::user_iterator UI = I.user_begin();
PHINode *PN = cast<PHINode>(*UI);
SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
for (auto *UI : Users) {
auto *User = cast<Instruction>(UI);

if (CurLoop->contains(User))
continue;

PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
// The PHI must be trivially replacable.
Instruction *New = sinkThroughTriviallyReplacablePHI(PN, &I, LI, SunkCopies,
SafetyInfo, CurLoop);
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
Changed = true;
}
return Changed;
}
Expand Down

0 comments on commit 22855c2

Please sign in to comment.