Expand Up
@@ -85,6 +85,9 @@ STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
STATISTIC (NonAdjacent, " Loops are not adjacent" );
STATISTIC (NonEmptyPreheader, " Loop has a non-empty preheader" );
STATISTIC (FusionNotBeneficial, " Fusion is not beneficial" );
STATISTIC (NonIdenticalGuards, " Candidates have different guards" );
STATISTIC (NonEmptyExitBlock, " Candidate has a non-empty exit block" );
STATISTIC (NonEmptyGuardBlock, " Candidate has a non-empty guard block" );
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
Expand Down
Expand Up
@@ -144,6 +147,8 @@ struct FusionCandidate {
SmallVector<Instruction *, 16 > MemWrites;
// / Are all of the members of this fusion candidate still valid
bool Valid;
// / Guard branch of the loop, if it exists
BranchInst *GuardBranch;
// / Dominator and PostDominator trees are needed for the
// / FusionCandidateCompare function, required by FusionCandidateSet to
Expand All
@@ -158,8 +163,14 @@ struct FusionCandidate {
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
: Preheader(L->getLoopPreheader ()), Header(L->getHeader ()),
ExitingBlock(L->getExitingBlock ()), ExitBlock(L->getExitBlock ()),
Latch(L->getLoopLatch ()), L(L), Valid(true ), DT(DT), PDT(PDT),
ORE(ORE) {
Latch(L->getLoopLatch ()), L(L), Valid(true ), GuardBranch(nullptr ),
DT(DT), PDT(PDT), ORE(ORE) {
// TODO: This is temporary while we fuse both rotated and non-rotated
// loops. Once we switch to only fusing rotated loops, the initialization of
// GuardBranch can be moved into the initialization list above.
if (isRotated ())
GuardBranch = L->getLoopGuardBranch ();
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
Expand Down
Expand Up
@@ -218,16 +229,55 @@ struct FusionCandidate {
assert (Latch == L->getLoopLatch () && " Latch is out of sync" );
}
// / Get the entry block for this fusion candidate.
// /
// / If this fusion candidate represents a guarded loop, the entry block is the
// / loop guard block. If it represents an unguarded loop, the entry block is
// / the preheader of the loop.
BasicBlock *getEntryBlock () const {
if (GuardBranch)
return GuardBranch->getParent ();
else
return Preheader;
}
// / Given a guarded loop, get the successor of the guard that is not in the
// / loop.
// /
// / This method returns the successor of the loop guard that is not located
// / within the loop (i.e., the successor of the guard that is not the
// / preheader).
// / This method is only valid for guarded loops.
BasicBlock *getNonLoopBlock () const {
assert (GuardBranch && " Only valid on guarded loops." );
assert (GuardBranch->isConditional () &&
" Expecting guard to be a conditional branch." );
return (GuardBranch->getSuccessor (0 ) == Preheader)
? GuardBranch->getSuccessor (1 )
: GuardBranch->getSuccessor (0 );
}
bool isRotated () const {
assert (L && " Expecting loop to be valid." );
assert (Latch && " Expecting latch to be valid." );
return L->isLoopExiting (Latch);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump () const {
dbgs () << " \t Preheader: " << (Preheader ? Preheader->getName () : " nullptr" )
dbgs () << " \t GuardBranch: "
<< (GuardBranch ? GuardBranch->getName () : " nullptr" ) << " \n "
<< " \t Preheader: " << (Preheader ? Preheader->getName () : " nullptr" )
<< " \n "
<< " \t Header: " << (Header ? Header->getName () : " nullptr" ) << " \n "
<< " \t ExitingBB: "
<< (ExitingBlock ? ExitingBlock->getName () : " nullptr" ) << " \n "
<< " \t ExitBB: " << (ExitBlock ? ExitBlock->getName () : " nullptr" )
<< " \n "
<< " \t Latch: " << (Latch ? Latch->getName () : " nullptr" ) << " \n " ;
<< " \t Latch: " << (Latch ? Latch->getName () : " nullptr" ) << " \n "
<< " \t EntryBlock: "
<< (getEntryBlock () ? getEntryBlock ()->getName () : " nullptr" )
<< " \n " ;
}
#endif
Expand Down
Expand Up
@@ -303,21 +353,24 @@ struct FusionCandidateCompare {
const FusionCandidate &RHS) const {
const DominatorTree *DT = LHS.DT ;
BasicBlock *LHSEntryBlock = LHS.getEntryBlock ();
BasicBlock *RHSEntryBlock = RHS.getEntryBlock ();
// Do not save PDT to local variable as it is only used in asserts and thus
// will trigger an unused variable warning if building without asserts.
assert (DT && LHS.PDT && " Expecting valid dominator tree" );
// Do this compare first so if LHS == RHS, function returns false.
if (DT->dominates (RHS. Preheader , LHS. Preheader )) {
if (DT->dominates (RHSEntryBlock, LHSEntryBlock )) {
// RHS dominates LHS
// Verify LHS post-dominates RHS
assert (LHS.PDT ->dominates (LHS. Preheader , RHS. Preheader ));
assert (LHS.PDT ->dominates (LHSEntryBlock, RHSEntryBlock ));
return false ;
}
if (DT->dominates (LHS. Preheader , RHS. Preheader )) {
if (DT->dominates (LHSEntryBlock, RHSEntryBlock )) {
// Verify RHS Postdominates LHS
assert (LHS.PDT ->dominates (RHS. Preheader , LHS. Preheader ));
assert (LHS.PDT ->dominates (RHSEntryBlock, LHSEntryBlock ));
return true ;
}
Expand Down
Expand Up
@@ -538,11 +591,14 @@ struct LoopFuser {
const FusionCandidate &FC1) const {
assert (FC0.Preheader && FC1.Preheader && " Expecting valid preheaders" );
if (DT.dominates (FC0.Preheader , FC1.Preheader ))
return PDT.dominates (FC1.Preheader , FC0.Preheader );
BasicBlock *FC0EntryBlock = FC0.getEntryBlock ();
BasicBlock *FC1EntryBlock = FC1.getEntryBlock ();
if (DT.dominates (FC0EntryBlock, FC1EntryBlock))
return PDT.dominates (FC1EntryBlock, FC0EntryBlock);
if (DT.dominates (FC1. Preheader , FC0. Preheader ))
return PDT.dominates (FC0. Preheader , FC1. Preheader );
if (DT.dominates (FC1EntryBlock, FC0EntryBlock ))
return PDT.dominates (FC0EntryBlock, FC1EntryBlock );
return false ;
}
Expand Down
Expand Up
@@ -677,11 +733,22 @@ struct LoopFuser {
continue ;
}
// For now we skip fusing if the second candidate has any instructions
// in the preheader. This is done because we currently do not have the
// safety checks to determine if it is save to move the preheader of
// the second candidate past the body of the first candidate. Once
// these checks are added, this condition can be removed.
// Ensure that FC0 and FC1 have identical guards.
// If one (or both) are not guarded, this check is not necessary.
if (FC0->GuardBranch && FC1->GuardBranch &&
!haveIdenticalGuards (*FC0, *FC1)) {
LLVM_DEBUG (dbgs () << " Fusion candidates do not have identical "
" guards. Not Fusing.\n " );
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonIdenticalGuards);
continue ;
}
// The following three checks look for empty blocks in FC0 and FC1. If
// any of these blocks are non-empty, we do not fuse. This is done
// because we currently do not have the safety checks to determine if
// it is safe to move the blocks past other blocks in the loop. Once
// these checks are added, these conditions can be relaxed.
if (!isEmptyPreheader (*FC1)) {
LLVM_DEBUG (dbgs () << " Fusion candidate does not have empty "
" preheader. Not fusing.\n " );
Expand All
@@ -690,6 +757,24 @@ struct LoopFuser {
continue ;
}
if (FC0->GuardBranch && !isEmptyExitBlock (*FC0)) {
LLVM_DEBUG (dbgs () << " Fusion candidate does not have empty exit "
" block. Not fusing.\n " );
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEmptyExitBlock);
continue ;
}
if (FC1->GuardBranch && !isEmptyGuardBlock (*FC1)) {
LLVM_DEBUG (dbgs () << " Fusion candidate does not have empty guard "
" block. Not fusing.\n " );
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEmptyGuardBlock);
continue ;
}
// Check the dependencies across the loops and do not fuse if it would
// violate them.
if (!dependencesAllowFusion (*FC0, *FC1)) {
LLVM_DEBUG (dbgs () << " Memory dependencies do not allow fusion!\n " );
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
Expand Down
Expand Up
@@ -895,7 +980,7 @@ struct LoopFuser {
LLVM_DEBUG (dbgs () << " Check if " << FC0 << " can be fused with " << FC1
<< " \n " );
assert (FC0.L ->getLoopDepth () == FC1.L ->getLoopDepth ());
assert (DT.dominates (FC0.Preheader , FC1.Preheader ));
assert (DT.dominates (FC0.getEntryBlock () , FC1.getEntryBlock () ));
for (Instruction *WriteL0 : FC0.MemWrites ) {
for (Instruction *WriteL1 : FC1.MemWrites )
Expand Down
Expand Up
@@ -945,18 +1030,89 @@ struct LoopFuser {
return true ;
}
// / Determine if the exit block of \p FC0 is the preheader of \p FC1. In this
// / case, there is no code in between the two fusion candidates, thus making
// / them adjacent.
// / Determine if two fusion candidates are adjacent in the CFG.
// /
// / This method will determine if there are additional basic blocks in the CFG
// / between the exit of \p FC0 and the entry of \p FC1.
// / If the two candidates are guarded loops, then it checks whether the
// / non-loop successor of the \p FC0 guard branch is the entry block of \p
// / FC1. If not, then the loops are not adjacent. If the two candidates are
// / not guarded loops, then it checks whether the exit block of \p FC0 is the
// / preheader of \p FC1.
bool isAdjacent (const FusionCandidate &FC0,
const FusionCandidate &FC1) const {
return FC0.ExitBlock == FC1.Preheader ;
// If the successor of the guard branch is FC1, then the loops are adjacent
if (FC0.GuardBranch )
return FC0.getNonLoopBlock () == FC1.getEntryBlock ();
else
return FC0.ExitBlock == FC1.getEntryBlock ();
}
// / Determine if two fusion candidates have identical guards
// /
// / This method will determine if two fusion candidates have the same guards.
// / The guards are considered the same if:
// / 1. The instructions to compute the condition used in the compare are
// / identical.
// / 2. The successors of the guard have the same flow into/around the loop.
// / If the compare instructions are identical, then the first successor of the
// / guard must go to the same place (either the preheader of the loop or the
// / NonLoopBlock). In other words, the the first successor of both loops must
// / both go into the loop (i.e., the preheader) or go around the loop (i.e.,
// / the NonLoopBlock). The same must be true for the second successor.
bool haveIdenticalGuards (const FusionCandidate &FC0,
const FusionCandidate &FC1) const {
assert (FC0.GuardBranch && FC1.GuardBranch &&
" Expecting FC0 and FC1 to be guarded loops." );
if (auto FC0CmpInst =
dyn_cast<Instruction>(FC0.GuardBranch ->getCondition ()))
if (auto FC1CmpInst =
dyn_cast<Instruction>(FC1.GuardBranch ->getCondition ()))
if (!FC0CmpInst->isIdenticalTo (FC1CmpInst))
return false ;
// The compare instructions are identical.
// Now make sure the successor of the guards have the same flow into/around
// the loop
if (FC0.GuardBranch ->getSuccessor (0 ) == FC0.Preheader )
return (FC1.GuardBranch ->getSuccessor (0 ) == FC1.Preheader );
else
return (FC1.GuardBranch ->getSuccessor (1 ) == FC1.Preheader );
}
// / Check that the guard for \p FC *only* contains the cmp/branch for the
// / guard.
// / Once we are able to handle intervening code, any code in the guard block
// / for FC1 will need to be treated as intervening code and checked whether
// / it can safely move around the loops.
bool isEmptyGuardBlock (const FusionCandidate &FC) const {
assert (FC.GuardBranch && " Expecting a fusion candidate with guard branch." );
if (auto *CmpInst = dyn_cast<Instruction>(FC.GuardBranch ->getCondition ())) {
auto *GuardBlock = FC.GuardBranch ->getParent ();
// If the generation of the cmp value is in GuardBlock, then the size of
// the guard block should be 2 (cmp + branch). If the generation of the
// cmp value is in a different block, then the size of the guard block
// should only be 1.
if (CmpInst->getParent () == GuardBlock)
return GuardBlock->size () == 2 ;
else
return GuardBlock->size () == 1 ;
}
return false ;
}
bool isEmptyPreheader (const FusionCandidate &FC) const {
assert (FC.Preheader && " Expecting a valid preheader" );
return FC.Preheader ->size () == 1 ;
}
bool isEmptyExitBlock (const FusionCandidate &FC) const {
assert (FC.ExitBlock && " Expecting a valid exit block" );
return FC.ExitBlock ->size () == 1 ;
}
// / Fuse two fusion candidates, creating a new fused loop.
// /
// / This method contains the mechanics of fusing two loops, represented by \p
Expand Down
Expand Up
@@ -993,6 +1149,12 @@ struct LoopFuser {
LLVM_DEBUG (dbgs () << " Fusion Candidate 0: \n " ; FC0.dump ();
dbgs () << " Fusion Candidate 1: \n " ; FC1.dump (););
// Fusing guarded loops is handled slightly differently than non-guarded
// loops and has been broken out into a separate method instead of trying to
// intersperse the logic within a single method.
if (FC0.GuardBranch )
return fuseGuardedLoops (FC0, FC1);
assert (FC1.Preheader == FC0.ExitBlock );
assert (FC1.Preheader ->size () == 1 &&
FC1.Preheader ->getSingleSuccessor () == FC1.Header );
Expand Down
Expand Up
@@ -1137,8 +1299,6 @@ struct LoopFuser {
SE.verify ();
#endif
FuseCounter++;
LLVM_DEBUG (dbgs () << " Fusion done:\n " );
return FC0.L ;
Expand Down
Expand Up
@@ -1170,6 +1330,232 @@ struct LoopFuser {
<< " and " << NV (" Cand2" , StringRef (FC1.Preheader ->getName ()))
<< " : " << Stat.getDesc ());
}
// / Fuse two guarded fusion candidates, creating a new fused loop.
// /
// / Fusing guarded loops is handled much the same way as fusing non-guarded
// / loops. The rewiring of the CFG is slightly different though, because of
// / the presence of the guards around the loops and the exit blocks after the
// / loop body. As such, the new loop is rewired as follows:
// / 1. Keep the guard branch from FC0 and use the non-loop block target
// / from the FC1 guard branch.
// / 2. Remove the exit block from FC0 (this exit block should be empty
// / right now).
// / 3. Remove the guard branch for FC1
// / 4. Remove the preheader for FC1.
// / The exit block successor for the latch of FC0 is updated to be the header
// / of FC1 and the non-exit block successor of the latch of FC1 is updated to
// / be the header of FC0, thus creating the fused loop.
Loop *fuseGuardedLoops (const FusionCandidate &FC0,
const FusionCandidate &FC1) {
assert (FC0.GuardBranch && FC1.GuardBranch && " Expecting guarded loops" );
BasicBlock *FC0GuardBlock = FC0.GuardBranch ->getParent ();
BasicBlock *FC1GuardBlock = FC1.GuardBranch ->getParent ();
BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock ();
BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock ();
assert (FC0NonLoopBlock == FC1GuardBlock && " Loops are not adjacent" );
SmallVector<DominatorTree::UpdateType, 8 > TreeUpdates;
// //////////////////////////////////////////////////////////////////////////
// Update the Loop Guard
// //////////////////////////////////////////////////////////////////////////
// The guard for FC0 is updated to guard both FC0 and FC1. This is done by
// changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1.
// Thus, one path from the guard goes to the preheader for FC0 (and thus
// executes the new fused loop) and the other path goes to the NonLoopBlock
// for FC1 (where FC1 guard would have gone if FC1 was not executed).
FC0.GuardBranch ->replaceUsesOfWith (FC0NonLoopBlock, FC1NonLoopBlock);
FC0.ExitBlock ->getTerminator ()->replaceUsesOfWith (FC1GuardBlock,
FC1.Header );
// The guard of FC1 is not necessary anymore.
FC1.GuardBranch ->eraseFromParent ();
new UnreachableInst (FC1GuardBlock->getContext (), FC1GuardBlock);
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Delete, FC1GuardBlock, FC1.Preheader ));
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
assert (pred_begin (FC1GuardBlock) == pred_end (FC1GuardBlock) &&
" Expecting guard block to have no predecessors" );
assert (succ_begin (FC1GuardBlock) == succ_end (FC1GuardBlock) &&
" Expecting guard block to have no successors" );
// Remember the phi nodes originally in the header of FC0 in order to rewire
// them later. However, this is only necessary if the new loop carried
// values might not dominate the exiting branch. While we do not generally
// test if this is the case but simply insert intermediate phi nodes, we
// need to make sure these intermediate phi nodes have different
// predecessors. To this end, we filter the special case where the exiting
// block is the latch block of the first loop. Nothing needs to be done
// anyway as all loop carried values dominate the latch and thereby also the
// exiting branch.
// KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch
// (because the loops are rotated. Thus, nothing will ever be added to
// OriginalFC0PHIs.
SmallVector<PHINode *, 8 > OriginalFC0PHIs;
if (FC0.ExitingBlock != FC0.Latch )
for (PHINode &PHI : FC0.Header ->phis ())
OriginalFC0PHIs.push_back (&PHI);
assert (OriginalFC0PHIs.empty () && " Expecting OriginalFC0PHIs to be empty!" );
// Replace incoming blocks for header PHIs first.
FC1.Preheader ->replaceSuccessorsPhiUsesWith (FC0.Preheader );
FC0.Latch ->replaceSuccessorsPhiUsesWith (FC1.Latch );
// The old exiting block of the first loop (FC0) has to jump to the header
// of the second as we need to execute the code in the second header block
// regardless of the trip count. That is, if the trip count is 0, so the
// back edge is never taken, we still have to execute both loop headers,
// especially (but not only!) if the second is a do-while style loop.
// However, doing so might invalidate the phi nodes of the first loop as
// the new values do only need to dominate their latch and not the exiting
// predicate. To remedy this potential problem we always introduce phi
// nodes in the header of the second loop later that select the loop carried
// value, if the second header was reached through an old latch of the
// first, or undef otherwise. This is sound as exiting the first implies the
// second will exit too, __without__ taking the back-edge (their
// trip-counts are equal after all).
FC0.ExitingBlock ->getTerminator ()->replaceUsesOfWith (FC0.ExitBlock ,
FC1.Header );
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Delete, FC0.ExitingBlock , FC0.ExitBlock ));
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Insert, FC0.ExitingBlock , FC1.Header ));
// Remove FC0 Exit Block
// The exit block for FC0 is no longer needed since control will flow
// directly to the header of FC1. Since it is an empty block, it can be
// removed at this point.
// TODO: In the future, we can handle non-empty exit blocks my merging any
// instructions from FC0 exit block into FC1 exit block prior to removing
// the block.
assert (pred_begin (FC0.ExitBlock ) == pred_end (FC0.ExitBlock ) &&
" Expecting exit block to be empty" );
FC0.ExitBlock ->getTerminator ()->eraseFromParent ();
new UnreachableInst (FC0.ExitBlock ->getContext (), FC0.ExitBlock );
// Remove FC1 Preheader
// The pre-header of L1 is not necessary anymore.
assert (pred_begin (FC1.Preheader ) == pred_end (FC1.Preheader ));
FC1.Preheader ->getTerminator ()->eraseFromParent ();
new UnreachableInst (FC1.Preheader ->getContext (), FC1.Preheader );
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Delete, FC1.Preheader , FC1.Header ));
// Moves the phi nodes from the second to the first loops header block.
while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header ->front ())) {
if (SE.isSCEVable (PHI->getType ()))
SE.forgetValue (PHI);
if (PHI->hasNUsesOrMore (1 ))
PHI->moveBefore (&*FC0.Header ->getFirstInsertionPt ());
else
PHI->eraseFromParent ();
}
// Introduce new phi nodes in the second loop header to ensure
// exiting the first and jumping to the header of the second does not break
// the SSA property of the phis originally in the first loop. See also the
// comment above.
Instruction *L1HeaderIP = &FC1.Header ->front ();
for (PHINode *LCPHI : OriginalFC0PHIs) {
int L1LatchBBIdx = LCPHI->getBasicBlockIndex (FC1.Latch );
assert (L1LatchBBIdx >= 0 &&
" Expected loop carried value to be rewired at this point!" );
Value *LCV = LCPHI->getIncomingValue (L1LatchBBIdx);
PHINode *L1HeaderPHI = PHINode::Create (
LCV->getType (), 2 , LCPHI->getName () + " .afterFC0" , L1HeaderIP);
L1HeaderPHI->addIncoming (LCV, FC0.Latch );
L1HeaderPHI->addIncoming (UndefValue::get (LCV->getType ()),
FC0.ExitingBlock );
LCPHI->setIncomingValue (L1LatchBBIdx, L1HeaderPHI);
}
// Update the latches
// Replace latch terminator destinations.
FC0.Latch ->getTerminator ()->replaceUsesOfWith (FC0.Header , FC1.Header );
FC1.Latch ->getTerminator ()->replaceUsesOfWith (FC1.Header , FC0.Header );
// If FC0.Latch and FC0.ExitingBlock are the same then we have already
// performed the updates above.
if (FC0.Latch != FC0.ExitingBlock )
TreeUpdates.emplace_back (DominatorTree::UpdateType (
DominatorTree::Insert, FC0.Latch , FC1.Header ));
TreeUpdates.emplace_back (DominatorTree::UpdateType (DominatorTree::Delete,
FC0.Latch , FC0.Header ));
TreeUpdates.emplace_back (DominatorTree::UpdateType (DominatorTree::Insert,
FC1.Latch , FC0.Header ));
TreeUpdates.emplace_back (DominatorTree::UpdateType (DominatorTree::Delete,
FC1.Latch , FC1.Header ));
// All done
// Apply the updates to the Dominator Tree and cleanup.
assert (succ_begin (FC1GuardBlock) == succ_end (FC1GuardBlock) &&
" FC1GuardBlock has successors!!" );
assert (pred_begin (FC1GuardBlock) == pred_end (FC1GuardBlock) &&
" FC1GuardBlock has predecessors!!" );
// Update DT/PDT
DTU.applyUpdates (TreeUpdates);
LI.removeBlock (FC1.Preheader );
DTU.deleteBB (FC1.Preheader );
DTU.deleteBB (FC0.ExitBlock );
DTU.flush ();
// Is there a way to keep SE up-to-date so we don't need to forget the loops
// and rebuild the information in subsequent passes of fusion?
SE.forgetLoop (FC1.L );
SE.forgetLoop (FC0.L );
// Merge the loops.
SmallVector<BasicBlock *, 8 > Blocks (FC1.L ->block_begin (),
FC1.L ->block_end ());
for (BasicBlock *BB : Blocks) {
FC0.L ->addBlockEntry (BB);
FC1.L ->removeBlockFromLoop (BB);
if (LI.getLoopFor (BB) != FC1.L )
continue ;
LI.changeLoopFor (BB, FC0.L );
}
while (!FC1.L ->empty ()) {
const auto &ChildLoopIt = FC1.L ->begin ();
Loop *ChildLoop = *ChildLoopIt;
FC1.L ->removeChildLoop (ChildLoopIt);
FC0.L ->addChildLoop (ChildLoop);
}
// Delete the now empty loop L1.
LI.erase (FC1.L );
#ifndef NDEBUG
assert (!verifyFunction (*FC0.Header ->getParent (), &errs ()));
assert (DT.verify (DominatorTree::VerificationLevel::Fast));
assert (PDT.verify ());
LI.verify (DT);
SE.verify ();
#endif
LLVM_DEBUG (dbgs () << " Fusion done:\n " );
return FC0.L ;
}
};
struct LoopFuseLegacy : public FunctionPass {
Expand Down