diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h index 6125fc7636a066..bd83a6a0cca4de 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h @@ -10,7 +10,6 @@ #define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { class Function; @@ -21,8 +20,7 @@ class LoopUnrollAndJamPass : public PassInfoMixin { public: explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {} - PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, LPMUpdater &U); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 942b8ce1de89af..4254bd71a41c0c 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -17,7 +17,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { @@ -98,7 +97,7 @@ LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, - OptimizationRemarkEmitter *ORE, LPMUpdater *U, + OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop = nullptr); bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index eb94d284961bcd..3a44dcf82ad637 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1207,8 +1207,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) - FPM.addPass(createFunctionToLoopPassAdaptor( - LoopUnrollAndJamPass(Level.getSpeedupLevel()))); + FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); FPM.addPass(LoopUnrollPass(LoopUnrollOptions( Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll))); @@ -1291,8 +1290,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) { - FPM.addPass(createFunctionToLoopPassAdaptor( - LoopUnrollAndJamPass(Level.getSpeedupLevel()))); + FPM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); } FPM.addPass(LoopUnrollPass(LoopUnrollOptions( Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 128c9f3bc63570..99e190b99fc4f0 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -247,6 +247,7 @@ FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) +FUNCTION_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) FUNCTION_PASS("loop-flatten", LoopFlattenPass()) FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("lowerswitch", LowerSwitchPass()) @@ -398,7 +399,6 @@ LOOP_PASS("loop-deletion", LoopDeletionPass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) -LOOP_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 5b636f41a2f845..495906e1a7630f 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -279,10 +278,11 @@ static bool computeUnrollAndJamCount( return false; } -static LoopUnrollResult tryToUnrollAndJamLoop( - Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, - const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, - OptimizationRemarkEmitter &ORE, int OptLevel, LPMUpdater *U) { +static LoopUnrollResult +tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, + ScalarEvolution &SE, const TargetTransformInfo &TTI, + AssumptionCache &AC, DependenceInfo &DI, + OptimizationRemarkEmitter &ORE, int OptLevel) { TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None, None, None, None, None, None); @@ -385,7 +385,7 @@ static LoopUnrollResult tryToUnrollAndJamLoop( Loop *EpilogueOuterLoop = nullptr; LoopUnrollResult UnrollResult = UnrollAndJamLoop( L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI, - &SE, &DT, &AC, &TTI, &ORE, U, &EpilogueOuterLoop); + &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop); // Assign new loop attributes. if (EpilogueOuterLoop) { @@ -424,23 +424,33 @@ static LoopUnrollResult tryToUnrollAndJamLoop( return UnrollResult; } -static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI, +static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, - OptimizationRemarkEmitter &ORE, int OptLevel, - LPMUpdater &U) { + OptimizationRemarkEmitter &ORE, + int OptLevel) { bool DidSomething = false; - ArrayRef Loops = LN.getLoops(); - // Add the loop nests in the reverse order of LN. See method + // The loop unroll and jam pass requires loops to be in simplified form, and + // also needs LCSSA. Since simplification may add new inner loops, it has to + // run before the legality and profitability checks. This means running the + // loop unroll and jam pass will simplify all loops, regardless of whether + // anything end up being unroll and jammed. + for (auto &L : LI) { + DidSomething |= + simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */); + DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + // Add the loop nests in the reverse order of LoopInfo. See method // declaration. SmallPriorityWorklist Worklist; - appendLoopsToWorklist(Loops, Worklist); + appendLoopsToWorklist(LI, Worklist); while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); LoopUnrollResult Result = - tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel, &U); + tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel); if (Result != LoopUnrollResult::Unmodified) DidSomething = true; } @@ -450,35 +460,29 @@ static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI, namespace { -class LoopUnrollAndJam : public LoopPass { +class LoopUnrollAndJam : public FunctionPass { public: static char ID; // Pass ID, replacement for typeid unsigned OptLevel; - LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) { + LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) { initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry()); } - bool runOnLoop(Loop *L, LPPassManager &LPM) override { - if (skipLoop(L)) + bool runOnFunction(Function &F) override { + if (skipFunction(F)) return false; - auto *F = L->getHeader()->getParent(); - auto &SE = getAnalysis().getSE(); - auto *LI = &getAnalysis().getLoopInfo(); - auto &DI = getAnalysis().getDI(); auto &DT = getAnalysis().getDomTree(); - auto &TTI = getAnalysis().getTTI(*F); + LoopInfo &LI = getAnalysis().getLoopInfo(); + ScalarEvolution &SE = getAnalysis().getSE(); + const TargetTransformInfo &TTI = + getAnalysis().getTTI(F); + auto &AC = getAnalysis().getAssumptionCache(F); + auto &DI = getAnalysis().getDI(); auto &ORE = getAnalysis().getORE(); - auto &AC = getAnalysis().getAssumptionCache(*F); - LoopUnrollResult Result = tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, - ORE, OptLevel, nullptr); - - if (Result == LoopUnrollResult::FullyUnrolled) - LPM.markLoopAsDeleted(*L); - - return Result != LoopUnrollResult::Unmodified; + return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel); } /// This transformation requires natural loop information & requires that @@ -501,10 +505,7 @@ char LoopUnrollAndJam::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -517,20 +518,19 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) { return new LoopUnrollAndJam(OptLevel); } -PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN, - LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - Function &F = *LN.getParent(); - - DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); - OptimizationRemarkEmitter ORE(&F); - - if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE, - OptLevel, U)) +PreservedAnalyses LoopUnrollAndJamPass::run(Function &F, + FunctionAnalysisManager &AM) { + ScalarEvolution &SE = AM.getResult(F); + LoopInfo &LI = AM.getResult(F); + TargetTransformInfo &TTI = AM.getResult(F); + AssumptionCache &AC = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + DependenceInfo &DI = AM.getResult(F); + OptimizationRemarkEmitter &ORE = + AM.getResult(F); + + if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel)) return PreservedAnalyses::all(); - auto PA = getLoopPassPreservedAnalyses(); - PA.preserve(); - return PA; + return getLoopPassPreservedAnalyses(); } diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index 12196f6991bed4..66407db8e1af51 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -49,7 +49,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -222,11 +221,12 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, If EpilogueLoop is non-null, it receives the epilogue loop (if it was necessary to create one and not fully unrolled). */ -LoopUnrollResult llvm::UnrollAndJamLoop( - Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, - bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, const TargetTransformInfo *TTI, - OptimizationRemarkEmitter *ORE, LPMUpdater *U, Loop **EpilogueLoop) { +LoopUnrollResult +llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, bool UnrollRemainder, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, const TargetTransformInfo *TTI, + OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) { // When we enter here we should have already checked that it is safe BasicBlock *Header = L->getHeader(); @@ -605,11 +605,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop( ++NumUnrolledAndJammed; // Update LoopInfo if the loop is completely removed. - if (CompletelyUnroll) { - if (U) - U->markLoopAsDeleted(*L, std::string(L->getName())); + if (CompletelyUnroll) LI->erase(L); - } #ifndef NDEBUG // We shouldn't have done anything to break loop simplify form or LCSSA. diff --git a/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll b/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll index c3a4ebd6dede5f..79c32c90174ebe 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll @@ -1,5 +1,5 @@ ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -verify-loop-info < %s -S | FileCheck %s -; RUN: opt -passes='loop(loop-unroll-and-jam),verify' -allow-unroll-and-jam < %s -S | FileCheck %s +; RUN: opt -passes='loop-unroll-and-jam,verify' -allow-unroll-and-jam < %s -S | FileCheck %s ; Check that the newly created loops to not fail to be added to LI ; This test deliberately disables UnJ on the middle loop, performing it instead on the