87 changes: 63 additions & 24 deletions llvm/lib/Transforms/Utils/LoopSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
Expand All @@ -70,6 +71,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;

Expand Down Expand Up @@ -118,7 +120,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
/// preheader insertion and analysis updating.
///
BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA) {
LoopInfo *LI, MemorySSAUpdater *MSSAU,
bool PreserveLCSSA) {
BasicBlock *Header = L->getHeader();

// Compute the set of predecessors of the loop that are not in the loop.
Expand All @@ -141,7 +144,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
LI, nullptr, PreserveLCSSA);
LI, MSSAU, PreserveLCSSA);
if (!PreheaderBB)
return nullptr;

Expand Down Expand Up @@ -221,7 +224,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, bool PreserveLCSSA,
AssumptionCache *AC) {
AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return nullptr;
Expand Down Expand Up @@ -255,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
SE->forgetLoop(L);

BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
DT, LI, nullptr, PreserveLCSSA);
DT, LI, MSSAU, PreserveLCSSA);

// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
Expand Down Expand Up @@ -318,7 +321,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,

// Split edges to exit blocks from the inner loop, if they emerged in the
// process of separating the outer one.
formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);

if (PreserveLCSSA) {
// Fix LCSSA form for L. Some values, which previously were only used inside
Expand All @@ -343,7 +346,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
/// and have that block branch to the loop header. This ensures that loops
/// have exactly one backedge.
static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
DominatorTree *DT, LoopInfo *LI) {
DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");

// Get information about the loop
Expand Down Expand Up @@ -456,15 +460,22 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Update dominator information
DT->splitBlock(BEBlock);

if (MSSAU)
MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
BEBlock);

return BEBlock;
}

/// Simplify one loop and queue further loops for simplification.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
bool PreserveLCSSA) {
MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
bool Changed = false;
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();

ReprocessLoop:

// Check to see that no blocks (other than the header) in this loop have
Expand All @@ -491,11 +502,15 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,

// Zap the dead pred's terminator and replace it with unreachable.
Instruction *TI = P->getTerminator();
changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA,
/*DTU=*/nullptr, MSSAU);
Changed = true;
}
}

if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();

// If there are exiting blocks with branches on undef, resolve the undef in
// the direction which will exit the loop. This will help simplify loop
// trip count computations.
Expand All @@ -520,7 +535,7 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
if (Preheader)
Changed = true;
}
Expand All @@ -529,9 +544,12 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
// predecessors that are inside of the loop. This check guarantees that the
// loop preheader/header will dominate the exit blocks. If the exit block has
// predecessors from outside of the loop, split the edge now.
if (formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA))
if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
Changed = true;

if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();

// If the header has more than two predecessors at this point (from the
// preheader and from multiple backedges), we must adjust the loop.
BasicBlock *LoopLatch = L->getLoopLatch();
Expand All @@ -540,8 +558,8 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
if (Loop *OuterL =
separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
PreserveLCSSA, AC, MSSAU)) {
++NumNested;
// Enqueue the outer loop as it should be processed next in our
// depth-first nest walk.
Expand All @@ -558,11 +576,14 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
// If we either couldn't, or didn't want to, identify nesting of the loops,
// insert a new block that all backedges target, then make it jump to the
// loop header.
LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
if (LoopLatch)
Changed = true;
}

if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();

const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();

// Scan over the PHI nodes in the loop header. Since they now have only two
Expand Down Expand Up @@ -620,9 +641,9 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
Instruction *Inst = &*I++;
if (Inst == CI)
continue;
if (!L->makeLoopInvariant(Inst, AnyInvariant,
Preheader ? Preheader->getTerminator()
: nullptr)) {
if (!L->makeLoopInvariant(
Inst, AnyInvariant,
Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) {
AllInvariant = false;
break;
}
Expand All @@ -639,7 +660,7 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
if (!FoldBranchToCommonDest(BI))
if (!FoldBranchToCommonDest(BI, MSSAU))
continue;

// Success. The block is now dead, so remove it from the loop,
Expand All @@ -659,6 +680,10 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
DT->changeImmediateDominator(Child, Node->getIDom());
}
DT->eraseNode(ExitingBlock);
if (MSSAU) {
SmallPtrSet<BasicBlock *, 1> ExitBlockSet{ExitingBlock};
MSSAU->removeBlocks(ExitBlockSet);
}

BI->getSuccessor(0)->removePredecessor(
ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
Expand All @@ -674,12 +699,15 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
if (Changed && SE)
SE->forgetTopmostLoop(L);

if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();

return Changed;
}

bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
bool PreserveLCSSA) {
MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
bool Changed = false;

#ifndef NDEBUG
Expand Down Expand Up @@ -707,7 +735,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,

while (!Worklist.empty())
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
AC, PreserveLCSSA);
AC, MSSAU, PreserveLCSSA);

return Changed;
}
Expand Down Expand Up @@ -740,6 +768,7 @@ namespace {
AU.addPreserved<DependenceAnalysisWrapperPass>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
AU.addPreserved<BranchProbabilityInfoWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}

/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
Expand Down Expand Up @@ -771,12 +800,21 @@ bool LoopSimplify::runOnFunction(Function &F) {
ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
MemorySSA *MSSA = nullptr;
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (EnableMSSALoopDependency) {
auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
if (MSSAAnalysis) {
MSSA = &MSSAAnalysis->getMSSA();
MSSAU = make_unique<MemorySSAUpdater>(MSSA);
}
}

bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);

// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);

#ifndef NDEBUG
if (PreserveLCSSA) {
Expand All @@ -797,9 +835,10 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);

// Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
// after simplifying the loops.
// after simplifying the loops. MemorySSA is not preserved either.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
Changed |=
simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);

if (!Changed)
return PreservedAnalyses::all();
Expand All @@ -816,7 +855,7 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
// blocks, but it does so only by splitting existing blocks and edges. This
// results in the interesting property that all new terminators inserted are
// unconditional branches which do not appear in BPI. All deletions are
// handled via ValueHandle callbacks w/in BPI.
// handled via ValueHandle callbacks w/in BPI.
PA.preserve<BranchProbabilityAnalysis>();
return PA;
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -884,11 +884,11 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,

// TODO: That potentially might be compile-time expensive. We should try
// to fix the loop-simplified form incrementally.
simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
} else {
// Simplify loops for which we might've broken loop-simplify form.
for (Loop *SubLoop : LoopsToSimplify)
simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
}
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SE->forgetTopmostLoop(L);

// FIXME: Incrementally update loop-simplify
simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);

NumPeeled++;

Expand Down
34 changes: 24 additions & 10 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
Expand Down Expand Up @@ -65,6 +66,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -291,9 +293,13 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
/// will be the same as those coming in from ExistPred, an existing predecessor
/// of Succ.
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
BasicBlock *ExistPred,
MemorySSAUpdater *MSSAU = nullptr) {
for (PHINode &PN : Succ->phis())
PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
if (MSSAU)
if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
}

/// Compute an abstract "cost" of speculating the given instruction,
Expand Down Expand Up @@ -669,7 +675,8 @@ struct ConstantComparesGatherer {

} // end anonymous namespace

static void EraseTerminatorAndDCECond(Instruction *TI) {
static void EraseTerminatorAndDCECond(Instruction *TI,
MemorySSAUpdater *MSSAU = nullptr) {
Instruction *Cond = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
Expand All @@ -682,7 +689,7 @@ static void EraseTerminatorAndDCECond(Instruction *TI) {

TI->eraseFromParent();
if (Cond)
RecursivelyDeleteTriviallyDeadInstructions(Cond);
RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
}

/// Return true if the specified terminator checks
Expand Down Expand Up @@ -2546,7 +2553,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();

const unsigned PredCount = pred_size(BB);
Expand Down Expand Up @@ -2757,7 +2765,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
(SuccFalseWeight + SuccTrueWeight) +
PredTrueWeight * SuccFalseWeight);
}
AddPredecessorToBlock(TrueDest, PredBlock, BB);
AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
Expand All @@ -2772,7 +2780,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// FalseWeight is FalseWeight for PBI * FalseWeight for BI.
NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
}
AddPredecessorToBlock(FalseDest, PredBlock, BB);
AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
PBI->setSuccessor(1, FalseDest);
}
if (NewWeights.size() == 2) {
Expand Down Expand Up @@ -2820,9 +2828,15 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
MergedCond);
}

// PBI is changed to branch to TrueDest below. Remove itself from
// potential phis from all other successors.
if (MSSAU)
MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);

// Change PBI from Conditional to Unconditional.
BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
EraseTerminatorAndDCECond(PBI);
EraseTerminatorAndDCECond(PBI, MSSAU);
PBI = New_PBI;
}

Expand Down Expand Up @@ -5805,7 +5819,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
return requestResimplify();
return false;
}
Expand Down Expand Up @@ -5869,7 +5883,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
return requestResimplify();

// We have a conditional branch to two blocks that are only reachable
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7574,7 +7574,8 @@ bool LoopVectorizePass::runImpl(
// will simplify all loops, regardless of whether anything end up being
// vectorized.
for (auto &L : *LI)
Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
Changed |=
simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);

// Build up a worklist of inner-loops to vectorize. This is necessary as
// the act of vectorizing or partially unrolling a loop creates new loops
Expand Down