diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index da18812f67b10f..20edc8699d7978 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -46,7 +46,6 @@ #include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" @@ -54,7 +53,6 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" @@ -66,7 +64,6 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeMoverUtils.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; @@ -117,11 +114,6 @@ static cl::opt FusionDependenceAnalysis( "Use all available analyses")), cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore); -static cl::opt FusionPeelMaxCount( - "loop-fusion-peel-max-count", cl::init(0), cl::Hidden, - cl::desc("Max number of iterations to be peeled from a loop, such that " - "fusion can take place")); - #ifndef NDEBUG static cl::opt VerboseFusionDebugging("loop-fusion-verbose-debug", @@ -165,12 +157,6 @@ struct FusionCandidate { bool Valid; /// Guard branch of the loop, if it exists BranchInst *GuardBranch; - /// Peeling Paramaters of the Loop. - TTI::PeelingPreferences PP; - /// Can you Peel this Loop? - bool AbleToPeel; - /// Has this loop been Peeled - bool Peeled; /// Dominator and PostDominator trees are needed for the /// FusionCandidateCompare function, required by FusionCandidateSet to @@ -182,13 +168,11 @@ struct FusionCandidate { OptimizationRemarkEmitter &ORE; FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE, - TTI::PeelingPreferences PP) + const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), Latch(L->getLoopLatch()), L(L), Valid(true), - GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)), - Peeled(false), DT(DT), PDT(PDT), ORE(ORE) { + GuardBranch(L->getLoopGuardBranch()), DT(DT), PDT(PDT), ORE(ORE) { // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect @@ -259,17 +243,6 @@ struct FusionCandidate { return Preheader; } - /// After Peeling the loop is modified quite a bit, hence all of the Blocks - /// need to be updated accordingly. - void updateAfterPeeling() { - Preheader = L->getLoopPreheader(); - Header = L->getHeader(); - ExitingBlock = L->getExitingBlock(); - ExitBlock = L->getExitBlock(); - Latch = L->getLoopLatch(); - verify(); - } - /// Given a guarded loop, get the successor of the guard that is not in the /// loop. /// @@ -281,8 +254,6 @@ struct FusionCandidate { assert(GuardBranch && "Only valid on guarded loops."); assert(GuardBranch->isConditional() && "Expecting guard to be a conditional branch."); - if (Peeled) - return GuardBranch->getSuccessor(1); return (GuardBranch->getSuccessor(0) == Preheader) ? GuardBranch->getSuccessor(1) : GuardBranch->getSuccessor(0); @@ -544,17 +515,13 @@ struct LoopFuser { ScalarEvolution &SE; PostDominatorTree &PDT; OptimizationRemarkEmitter &ORE; - AssumptionCache &AC; - - const TargetTransformInfo &TTI; public: LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI, ScalarEvolution &SE, PostDominatorTree &PDT, - OptimizationRemarkEmitter &ORE, const DataLayout &DL, - AssumptionCache &AC, const TargetTransformInfo &TTI) + OptimizationRemarkEmitter &ORE, const DataLayout &DL) : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI), - DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {} + DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {} /// This is the main entry point for loop fusion. It will traverse the /// specified function and collect candidate loops to fuse, starting at the @@ -639,9 +606,7 @@ struct LoopFuser { /// Flow Equivalent sets, sorted by dominance. void collectFusionCandidates(const LoopVector &LV) { for (Loop *L : LV) { - TTI::PeelingPreferences PP = - gatherPeelingPreferences(L, SE, TTI, None, None); - FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP); + FusionCandidate CurrCand(L, &DT, &PDT, ORE); if (!CurrCand.isEligibleForFusion(SE)) continue; @@ -691,126 +656,33 @@ struct LoopFuser { /// Determine if two fusion candidates have the same trip count (i.e., they /// execute the same number of iterations). /// - /// This function will return a pair of values. The first is a boolean, - /// stating whether or not the two candidates are known at compile time to - /// have the same TripCount. The second is the difference in the two - /// TripCounts. This information can be used later to determine whether or not - /// peeling can be performed on either one of the candiates. - std::pair> - haveIdenticalTripCounts(const FusionCandidate &FC0, - const FusionCandidate &FC1) const { - + /// Note that for now this method simply returns a boolean value because there + /// are no mechanisms in loop fusion to handle different trip counts. In the + /// future, this behaviour can be extended to adjust one of the loops to make + /// the trip counts equal (e.g., loop peeling). When this is added, this + /// interface may need to change to return more information than just a + /// boolean value. + bool identicalTripCounts(const FusionCandidate &FC0, + const FusionCandidate &FC1) const { const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L); if (isa(TripCount0)) { UncomputableTripCount++; LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!"); - return {false, None}; + return false; } const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L); if (isa(TripCount1)) { UncomputableTripCount++; LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!"); - return {false, None}; + return false; } - LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & " << *TripCount1 << " are " << (TripCount0 == TripCount1 ? "identical" : "different") << "\n"); - if (TripCount0 == TripCount1) - return {true, 0}; - - LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, " - "determining the difference between trip counts\n"); - - // Currently only considering loops with a single exit point - // and a non-constant trip count. - unsigned TC0 = SE.getSmallConstantTripCount(FC0.L); - unsigned TC1 = SE.getSmallConstantTripCount(FC1.L); - - // If any of the tripcounts are zero that means that loop(s) do not have - // a single exit or a constant tripcount. - if (TC0 == 0 || TC1 == 0) { - LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not " - "have a constant number of iterations. Peeling " - "is not benefical\n"); - return {false, None}; - } - - Optional Difference = None; - int Diff = TC0 - TC1; - - if (Diff > 0) - Difference = Diff; - else { - LLVM_DEBUG( - dbgs() - << "Difference is less than 0. FC1 (second loop) has more " - "iterations than the first one. Currently not supported.\n"); - } - - LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference - << "\n"); - - return {false, Difference}; - } - - void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1, - unsigned PeelCount) { - assert(FC0.AbleToPeel && "Should be able to peel loop"); - - LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount - << " iterations of the first loop. \n"); - - FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true); - if (FC0.Peeled) { - LLVM_DEBUG(dbgs() << "Done Peeling\n"); - -#ifndef NDEBUG - auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1); - - assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 && - "Loops should have identical trip counts after peeling"); -#endif - - FC0.PP.PeelCount = PeelCount; - - // Peeling does not update the PDT - PDT.recalculate(*FC0.Preheader->getParent()); - - FC0.updateAfterPeeling(); - - // In this case the iterations of the loop are constant, so the first - // loop will execute completely (will not jump from one of - // the peeled blocks to the second loop). Here we are updating the - // branch conditions of each of the peeled blocks, such that it will - // branch to its successor which is not the Preheader of the second Loop. - // Doing this update will ensure that the entry block of the first loop - // dominates the entry block of the second loop. - BasicBlock *BB = - FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader; - SmallVector TreeUpdates; - for (BasicBlock *Pred : predecessors(BB)) { - if (Pred != FC0.ExitBlock) { - BranchInst *Old = dyn_cast(Pred->getTerminator()); - BasicBlock *Succ = Old->getSuccessor(0); - if (Succ == BB) - Succ = Old->getSuccessor(1); - BranchInst *NewBranch = BranchInst::Create(Succ); - ReplaceInstWithInst(Old, NewBranch); - TreeUpdates.emplace_back( - DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB)); - } - } - DTU.applyUpdates(TreeUpdates); - DTU.flush(); - LLVM_DEBUG( - dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount - << " iterations from the first loop.\n" - "Both Loops have the same number of iterations now.\n"); - } + return (TripCount0 == TripCount1); } /// Walk each set of control flow equivalent fusion candidates and attempt to @@ -844,32 +716,7 @@ struct LoopFuser { FC0->verify(); FC1->verify(); - // Check if the candidates have identical tripcounts (first value of - // pair), and if not check the difference in the tripcounts between - // the loops (second value of pair). The difference is not equal to - // None iff the loops iterate a constant number of times, and have a - // single exit. - std::pair> IdenticalTripCountRes = - haveIdenticalTripCounts(*FC0, *FC1); - bool SameTripCount = IdenticalTripCountRes.first; - Optional TCDifference = IdenticalTripCountRes.second; - - // Here we are checking that FC0 (the first loop) can be peeled, and - // both loops have different tripcounts. - if (FC0->AbleToPeel && !SameTripCount && TCDifference) { - if (*TCDifference > FusionPeelMaxCount) { - LLVM_DEBUG(dbgs() - << "Difference in loop trip counts: " << *TCDifference - << " is greater than maximum peel count specificed: " - << FusionPeelMaxCount << "\n"); - } else { - // Dependent on peeling being performed on the first loop, and - // assuming all other conditions for fusion return true. - SameTripCount = true; - } - } - - if (!SameTripCount) { + if (!identicalTripCounts(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip " "counts. Not fusing.\n"); reportLoopFusion(*FC0, *FC1, @@ -887,7 +734,7 @@ struct LoopFuser { // Ensure that FC0 and FC1 have identical guards. // If one (or both) are not guarded, this check is not necessary. if (FC0->GuardBranch && FC1->GuardBranch && - !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) { + !haveIdenticalGuards(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical " "guards. Not Fusing.\n"); reportLoopFusion(*FC0, *FC1, @@ -956,23 +803,13 @@ struct LoopFuser { LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and " << *FC1 << "\n"); - FusionCandidate FC0Copy = *FC0; - // Peel the loop after determining that fusion is legal. The Loops - // will still be safe to fuse after the peeling is performed. - bool Peel = TCDifference && *TCDifference > 0; - if (Peel) - peelFusionCandidate(FC0Copy, *FC1, *TCDifference); - // Report fusion to the Optimization Remarks. // Note this needs to be done *before* performFusion because // performFusion will change the original loops, making it not // possible to identify them after fusion is complete. - reportLoopFusion((Peel ? FC0Copy : *FC0), *FC1, - FuseCounter); + reportLoopFusion(*FC0, *FC1, FuseCounter); - FusionCandidate FusedCand( - performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE, - FC0Copy.PP); + FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE); FusedCand.verify(); assert(FusedCand.isEligibleForFusion(SE) && "Fused candidate should be eligible for fusion!"); @@ -1249,17 +1086,16 @@ struct LoopFuser { return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader); } - /// Modify the latch branch of FC to be unconditional since successors of the - /// branch are the same. + /// Simplify the condition of the latch branch of \p FC to true, when both of + /// its successors are the same. void simplifyLatchBranch(const FusionCandidate &FC) const { BranchInst *FCLatchBranch = dyn_cast(FC.Latch->getTerminator()); if (FCLatchBranch) { assert(FCLatchBranch->isConditional() && FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) && "Expecting the two successors of FCLatchBranch to be the same"); - BranchInst *NewBranch = - BranchInst::Create(FCLatchBranch->getSuccessor(0)); - ReplaceInstWithInst(FCLatchBranch, NewBranch); + FCLatchBranch->setCondition( + llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType())); } } @@ -1319,8 +1155,7 @@ struct LoopFuser { if (FC0.GuardBranch) return fuseGuardedLoops(FC0, FC1); - assert(FC1.Preheader == - (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock)); + assert(FC1.Preheader == FC0.ExitBlock); assert(FC1.Preheader->size() == 1 && FC1.Preheader->getSingleSuccessor() == FC1.Header); @@ -1343,7 +1178,7 @@ struct LoopFuser { FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch); // Then modify the control flow and update DT and PDT. - SmallVector TreeUpdates; + SmallVector TreeUpdates; // The old exiting block of the first loop (FC0) has to jump to the header // of the second as we need to execute the code in the second header block @@ -1362,27 +1197,12 @@ struct LoopFuser { // to FC1.Header? I think this is basically what the three sequences are // trying to accomplish; however, doing this directly in the CFG may mean // the DT/PDT becomes invalid - if (!FC0.Peeled) { - FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader, - FC1.Header); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader)); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); - } else { - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader)); - - // Remove the ExitBlock of the first Loop (also not needed) - FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock, - FC1.Header); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock)); - FC0.ExitBlock->getTerminator()->eraseFromParent(); - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); - new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock); - } + FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader, + FC1.Header); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); // The pre-header of L1 is not necessary anymore. assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader)); @@ -1426,7 +1246,7 @@ struct LoopFuser { FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); - // Modify the latch branch of FC0 to be unconditional as both successors of + // Change the condition of FC0 latch branch to true, as both successors of // the branch are the same. simplifyLatchBranch(FC0); @@ -1448,11 +1268,6 @@ struct LoopFuser { LI.removeBlock(FC1.Preheader); DTU.deleteBB(FC1.Preheader); - if (FC0.Peeled) { - LI.removeBlock(FC0.ExitBlock); - DTU.deleteBB(FC0.ExitBlock); - } - DTU.flush(); // Is there a way to keep SE up-to-date so we don't need to forget the loops @@ -1549,15 +1364,10 @@ struct LoopFuser { BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent(); BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock(); BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock(); - BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor(); // Move instructions from the exit block of FC0 to the beginning of the exit - // block of FC1, in the case that the FC0 loop has not been peeled. In the - // case that FC0 loop is peeled, then move the instructions of the successor - // of the FC0 Exit block to the beginning of the exit block of FC1. - moveInstructionsToTheBeginning( - (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock, - DT, PDT, DI); + // block of FC1. + moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI); // Move instructions from the guard block of FC1 to the end of the guard // block of FC0. @@ -1577,9 +1387,8 @@ struct LoopFuser { // for FC1 (where FC1 guard would have gone if FC1 was not executed). FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock); FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock); - - BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock; - BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header); + FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock, + FC1.Header); // The guard of FC1 is not necessary anymore. FC1.GuardBranch->eraseFromParent(); @@ -1594,15 +1403,6 @@ struct LoopFuser { TreeUpdates.emplace_back(DominatorTree::UpdateType( DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock)); - if (FC0.Peeled) { - // Remove the Block after the ExitBlock of FC0 - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock)); - FC0ExitBlockSuccessor->getTerminator()->eraseFromParent(); - new UnreachableInst(FC0ExitBlockSuccessor->getContext(), - FC0ExitBlockSuccessor); - } - assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) && "Expecting guard block to have no predecessors"); assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) && @@ -1709,7 +1509,7 @@ struct LoopFuser { FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); - // Modify the latch branch of FC0 to be unconditional as both successors of + // Change the condition of FC0 latch branch to true, as both successors of // the branch are the same. simplifyLatchBranch(FC0); @@ -1740,10 +1540,6 @@ struct LoopFuser { LI.removeBlock(FC1GuardBlock); LI.removeBlock(FC1.Preheader); LI.removeBlock(FC0.ExitBlock); - if (FC0.Peeled) { - LI.removeBlock(FC0ExitBlockSuccessor); - DTU.deleteBB(FC0ExitBlockSuccessor); - } DTU.deleteBB(FC1GuardBlock); DTU.deleteBB(FC1.Preheader); DTU.deleteBB(FC0.ExitBlock); @@ -1810,8 +1606,6 @@ struct LoopFuseLegacy : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); - AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -1828,12 +1622,9 @@ struct LoopFuseLegacy : public FunctionPass { auto &SE = getAnalysis().getSE(); auto &PDT = getAnalysis().getPostDomTree(); auto &ORE = getAnalysis().getORE(); - auto &AC = getAnalysis().getAssumptionCache(F); - const TargetTransformInfo &TTI = - getAnalysis().getTTI(F); - const DataLayout &DL = F.getParent()->getDataLayout(); - LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI); + const DataLayout &DL = F.getParent()->getDataLayout(); + LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL); return LF.fuseLoops(F); } }; @@ -1846,11 +1637,9 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) { auto &SE = AM.getResult(F); auto &PDT = AM.getResult(F); auto &ORE = AM.getResult(F); - auto &AC = AM.getResult(F); - const TargetTransformInfo &TTI = AM.getResult(F); - const DataLayout &DL = F.getParent()->getDataLayout(); - LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI); + const DataLayout &DL = F.getParent()->getDataLayout(); + LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL); bool Changed = LF.fuseLoops(F); if (!Changed) return PreservedAnalyses::all(); @@ -1873,8 +1662,6 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false) FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); } diff --git a/llvm/test/Transforms/LoopFusion/guarded_peel.ll b/llvm/test/Transforms/LoopFusion/guarded_peel.ll deleted file mode 100644 index 0c8df52017baaf..00000000000000 --- a/llvm/test/Transforms/LoopFusion/guarded_peel.ll +++ /dev/null @@ -1,85 +0,0 @@ -; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s - -; This will test if we are able to fuse two guarded loops which have constant -; but different trip counts. The first two iterations of the first loop should -; be peeled off, and then the loops should be fused together. - -@B = common global [1024 x i32] zeroinitializer, align 16 - -; CHECK: void @main -; CHECK-NEXT: entry: -; CHECK: br i1 %cmp4, label %for.first.entry, label %for.end -; CHECK: for.first.entry -; CHECK-NEXT: br label %for.first.peel.begin -; CHECK: for.first.peel.begin: -; CHECK-NEXT: br label %for.first.peel -; CHECK: for.first.peel: -; CHECK: br label %for.first.peel.next -; CHECK: for.first.peel.next: -; CHECK-NEXT: br label %for.first.peel2 -; CHECK: for.first.peel2: -; CHECK: br label %for.first.peel.next1 -; CHECK: for.first.peel.next1: -; CHECK-NEXT: br label %for.first.peel.next11 -; CHECK: for.first.peel.next11: -; CHECK-NEXT: br label %for.first.entry.peel.newph -; CHECK: for.first.entry.peel.newph: -; CHECK: br label %for.first -; CHECK: for.first: -; CHECK: br i1 %cmp3, label %for.first, label %for.second.exit -; CHECK: for.second.exit: -; CHECK: br label %for.end -; CHECK: for.end: -; CHECK-NEXT: ret void - -define void @main(i32* noalias %A) { -entry: - %cmp4 = icmp slt i64 0, 45 - br i1 %cmp4, label %for.first.entry, label %for.second.guard - -for.first.entry: ; preds = %entry - br label %for.first - -for.first: ; preds = %for.first.entry, %for.first - %i.05 = phi i64 [ %inc, %for.first ], [ 0, %for.first.entry ] - %sub = sub nsw i64 %i.05, 3 - %add = add nsw i64 %i.05, 3 - %mul = mul nsw i64 %sub, %add - %rem = srem i64 %mul, %i.05 - %conv = trunc i64 %rem to i32 - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.05 - store i32 %conv, i32* %arrayidx, align 4 - %inc = add nsw i64 %i.05, 1 - %cmp = icmp slt i64 %inc, 45 - br i1 %cmp, label %for.first, label %for.first.exit - -for.first.exit: ; preds = %for.first - br label %for.second.guard - -for.second.guard: ; preds = %for.first.exit, %entry - %cmp31 = icmp slt i64 2, 45 - br i1 %cmp31, label %for.second.entry, label %for.end - -for.second.entry: ; preds = %for.second.guard - br label %for.second - -for.second: ; preds = %for.second.entry, %for.second - %i1.02 = phi i64 [ %inc14, %for.second ], [ 2, %for.second.entry ] - %sub7 = sub nsw i64 %i1.02, 3 - %add8 = add nsw i64 %i1.02, 3 - %mul9 = mul nsw i64 %sub7, %add8 - %rem10 = srem i64 %mul9, %i1.02 - %conv11 = trunc i64 %rem10 to i32 - %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.02 - store i32 %conv11, i32* %arrayidx12, align 4 - %inc14 = add nsw i64 %i1.02, 1 - %cmp3 = icmp slt i64 %inc14, 45 - br i1 %cmp3, label %for.second, label %for.second.exit - -for.second.exit: ; preds = %for.second - br label %for.end - -for.end: ; preds = %for.second.exit, %for.second.guard - ret void -} - diff --git a/llvm/test/Transforms/LoopFusion/guarded_unsafeblock_peel.ll b/llvm/test/Transforms/LoopFusion/guarded_unsafeblock_peel.ll deleted file mode 100644 index 0d825fb751f52e..00000000000000 --- a/llvm/test/Transforms/LoopFusion/guarded_unsafeblock_peel.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s - -; This will test that we do not fuse two guarded loops together. -; These loops do not have the same trip count, fusing should be possible after -; peeling the loops. However, the exit block of the first loop makes the loops -; unsafe to peel. -; The expected output of this test is the function as below. - -; CHECK: void @unsafe_exitblock -; CHECK: for.first.guard -; CHECK: br i1 %cmp3, label %for.first.preheader, label %for.second.guard -; CHECK: for.first.preheader: -; CHECK-NEXT: br label %for.first -; CHECK: for.first: -; CHECK: br i1 %cmp, label %for.first, label %for.first.exit -; CHECK: for.first.exit: -; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br label %for.second.guard -; CHECK: for.second.guard: -; CHECK: br i1 %cmp21, label %for.second.preheader, label %for.end -; CHECK: for.second.preheader: -; CHECK-NEXT: br label %for.second -; CHECK: for.second: -; CHECK: br i1 %cmp2, label %for.second, label %for.second.exit -; CHECK: for.second.exit: -; CHECK-NEXT: br label %for.end -; CHECK: for.end: -; CHECK-NEXT: ret void - -define void @unsafe_exitblock(i32* noalias %A, i32* noalias %B) { -for.first.guard: - %cmp3 = icmp slt i64 0, 45 - br i1 %cmp3, label %for.first.preheader, label %for.second.guard - -for.first.preheader: - br label %for.first - -for.first: - %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 - store i32 0, i32* %arrayidx, align 4 - %inc = add nsw i64 %i.04, 1 - %cmp = icmp slt i64 %inc, 45 - br i1 %cmp, label %for.first, label %for.first.exit - -for.first.exit: - call void @bar() - br label %for.second.guard - -for.second.guard: - %cmp21 = icmp slt i64 2,45 - br i1 %cmp21, label %for.second.preheader, label %for.end - -for.second.preheader: - br label %for.second - -for.second: - %j.02 = phi i64 [ %inc6, %for.second ], [ 2, %for.second.preheader ] - %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 - store i32 0, i32* %arrayidx4, align 4 - %inc6 = add nsw i64 %j.02, 1 - %cmp2 = icmp slt i64 %inc6, 45 - br i1 %cmp2, label %for.second, label %for.second.exit - -for.second.exit: - br label %for.end - -for.end: - ret void -} - -declare void @bar() diff --git a/llvm/test/Transforms/LoopFusion/nonadjacent_peel.ll b/llvm/test/Transforms/LoopFusion/nonadjacent_peel.ll deleted file mode 100644 index 8fe8824aa9fe3c..00000000000000 --- a/llvm/test/Transforms/LoopFusion/nonadjacent_peel.ll +++ /dev/null @@ -1,83 +0,0 @@ -; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s - -; This will check that we do not fuse these two loops together. These loops are -; valid cadidates for peeling, however they are not adjacent. -; The expected output of this test is the function below. - -; CHECK: void @function -; CHECK-NEXT: for.first.preheader: -; CHECK-NEXT: br label %for.first -; CHECK: for.first: -; CHECK: br label %for.first.latch -; CHECK: for.first.latch: -; CHECK: br i1 %exitcond4, label %for.first, label %for.first.exit -; CHECK: for.first.exit: -; CHECK-NEXT: br label %for.next -; CHECK: for.next: -; CHECK-NEXT: br label %for.second.preheader -; CHECK: for.second.preheader: -; CHECK: br label %for.second -; CHECK: for.second: -; CHECK: br label %for.second.latch -; CHECK: for.second.latch: -; CHECK: br i1 %exitcond, label %for.second, label %for.end -; CHECK: for.end: -; CHECK-NEXT: ret void - -@B = common global [1024 x i32] zeroinitializer, align 16 - -define void @function(i32* noalias %arg) { -for.first.preheader: - br label %for.first - -for.first: ; preds = %for.first.preheader, %for.first.latch - %.014 = phi i32 [ 0, %for.first.preheader ], [ %tmp15, %for.first.latch ] - %indvars.iv23 = phi i64 [ 0, %for.first.preheader ], [ %indvars.iv.next3, %for.first.latch ] - %tmp = add nsw i32 %.014, -3 - %tmp8 = add nuw nsw i64 %indvars.iv23, 3 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv23 to i32 - %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23 - store i32 %tmp12, i32* %tmp13, align 4 - br label %for.first.latch - -for.first.latch: ; preds = %for.first - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1 - %tmp15 = add nuw nsw i32 %.014, 1 - %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 - br i1 %exitcond4, label %for.first, label %for.first.exit - -for.first.exit: ; preds: %for.first.latch - br label %for.next - -for.next: ; preds = %for.first.exit - br label %for.second.preheader - -for.second.preheader: ; preds = %for.next - br label %for.second - -for.second: ; preds = %for.second.preheader, %for.second.latch - %.02 = phi i32 [ 0, %for.second.preheader ], [ %tmp28, %for.second.latch ] - %indvars.iv1 = phi i64 [ 3, %for.second.preheader ], [ %indvars.iv.next, %for.second.latch ] - %tmp20 = add nsw i32 %.02, -3 - %tmp21 = add nuw nsw i64 %indvars.iv1, 3 - %tmp22 = trunc i64 %tmp21 to i32 - %tmp23 = mul nsw i32 %tmp20, %tmp22 - %tmp24 = trunc i64 %indvars.iv1 to i32 - %tmp25 = srem i32 %tmp23, %tmp24 - %tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1 - store i32 %tmp25, i32* %tmp26, align 4 - br label %for.second.latch - -for.second.latch: ; preds = %for.second - %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 - %tmp28 = add nuw nsw i32 %.02, 1 - %exitcond = icmp ne i64 %indvars.iv.next, 100 - br i1 %exitcond, label %for.second, label %for.end - -for.end: ; preds = %for.second.latch - ret void -} - diff --git a/llvm/test/Transforms/LoopFusion/peel.ll b/llvm/test/Transforms/LoopFusion/peel.ll deleted file mode 100644 index d978fb3ede400e..00000000000000 --- a/llvm/test/Transforms/LoopFusion/peel.ll +++ /dev/null @@ -1,106 +0,0 @@ -; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s - -; This will test whether we can fuse two loops together if they have constant -; but a different tripcount. -; The first three iterations of the first loop should be peeled, and then the -; two loops should be fused together in this example. - -; C Code -; -; int B[1024]; -; -; void function(int *arg) { -; for (int i = 0; i != 100; ++i) -; arg[i] = ((i - 3)*(i+3)) % i; -; -; for (int i = 3; i != 100; ++i) -; B[i] = ((i-6)*(i+3)) % i; -; } - -; CHECK: void @function -; CHECK-NEXT: for.first.preheader: -; CHECK-NEXT: br label %for.first.peel.begin -; CHECK: for.first.peel.begin: -; CHECK-NEXT: br label %for.first.peel -; CHECK: for.first.peel -; CHECK: br label %for.first.latch.peel -; CHECK: for.first.latch.peel: -; CHECK: br label %for.first.peel.next -; CHECK: for.first.peel.next: -; CHECK-NEXT: br label %for.first.peel2 -; CHECK: for.first.peel2: -; CHECK: br label %for.first.latch.peel10 -; CHECK: for.first.latch.peel10: -; CHECK: br label %for.first.peel.next1 -; CHECK: for.first.peel.next1: -; CHECK-NEXT: br label %for.first.peel15 -; CHECK: for.first.peel15: -; CHECK: br label %for.first.latch.peel23 -; CHECK: for.first.latch.peel23: -; CHECK: br label %for.first.peel.next14 -; CHECK: for.first.peel.next14: -; CHECK-NEXT: br label %for.first.peel.next27 -; CHECK: for.first.peel.next27: -; CHECK-NEXT: br label %for.first.preheader.peel.newph -; CHECK: for.first.preheader.peel.newph: -; CHECK-NEXT: br label %for.first -; CHECK: for.first: -; CHECK: br label %for.first.latch -; CHECK: for.first.latch: -; CHECK: br label %for.second.latch -; CHECK: for.second.latch: -; CHECK: br i1 %exitcond, label %for.first, label %for.end -; CHECK: for.end: -; CHECK-NEXT: ret void - -@B = common global [1024 x i32] zeroinitializer, align 16 - -define void @function(i32* noalias %arg) { -for.first.preheader: - br label %for.first - -for.first: ; preds = %for.first.preheader, %for.first.latch - %.014 = phi i32 [ 0, %for.first.preheader ], [ %tmp15, %for.first.latch ] - %indvars.iv23 = phi i64 [ 0, %for.first.preheader ], [ %indvars.iv.next3, %for.first.latch ] - %tmp = add nsw i32 %.014, -3 - %tmp8 = add nuw nsw i64 %indvars.iv23, 3 - %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv23 to i32 - %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23 - store i32 %tmp12, i32* %tmp13, align 4 - br label %for.first.latch - -for.first.latch: ; preds = %for.first - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1 - %tmp15 = add nuw nsw i32 %.014, 1 - %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 - br i1 %exitcond4, label %for.first, label %for.second.preheader - -for.second.preheader: ; preds = %for.first.latch - br label %for.second - -for.second: ; preds = %for.second.preheader, %for.second.latch - %.02 = phi i32 [ 0, %for.second.preheader ], [ %tmp28, %for.second.latch ] - %indvars.iv1 = phi i64 [ 3, %for.second.preheader ], [ %indvars.iv.next, %for.second.latch ] - %tmp20 = add nsw i32 %.02, -3 - %tmp21 = add nuw nsw i64 %indvars.iv1, 3 - %tmp22 = trunc i64 %tmp21 to i32 - %tmp23 = mul nsw i32 %tmp20, %tmp22 - %tmp24 = trunc i64 %indvars.iv1 to i32 - %tmp25 = srem i32 %tmp23, %tmp24 - %tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1 - store i32 %tmp25, i32* %tmp26, align 4 - br label %for.second.latch - -for.second.latch: ; preds = %for.second - %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 - %tmp28 = add nuw nsw i32 %.02, 1 - %exitcond = icmp ne i64 %indvars.iv.next, 100 - br i1 %exitcond, label %for.second, label %for.end - -for.end: ; preds = %for.second.latch - ret void -} -