From 36844e9ac6d11bfd9d2328d456665193f1962f83 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 18 Sep 2025 15:57:46 +0800 Subject: [PATCH 1/3] [SimpleLoopUnswitch] Don't use BlockFrequencyInfo to skip cold loops In https://reviews.llvm.org/D129599, non-trivial switching was disabled for cold loops in the interest of code size. This added a dependency on BlockFrequencyInfo, but in loop passes this is only available on a lossy basis: see https://reviews.llvm.org/D86156 LICM moved away from BFI and as of today SimpleLoopUnswitch is the only remaining loop pass that uses BFI, for the sole reason to prevent code size increases in PGO builds. It doesn't use BFI if there's no profile summary available. However just before the BFI check, we also check to see if the function is marked as OptSize: https://reviews.llvm.org/D94559 And coincidentally sometime after the initial BFI patch PGOForceFunctionAttrsPass was added which will automatically annotate cold functions with OptSize: https://reviews.llvm.org/D149800 I think using PGOForceFunctionAttrs to add the OptSize is probably a more accurate and generalized way to prevent unwanted code size increases. So this patch proposes to remove the BFI check in SimpleLoopUnswitch. This isn't 100% the same behaviour since the previous behaviour checked for coldness at the loop level and this is now at the function level, but I believe the benefits outweigh this: - It allows us to remove BFI from the function to loop pass adapter, which was only enabled for certain stages in the LTO pipeline - We no longer have to worry about lossy analysis results - Which in turn means the decision to avoid non-trivial switching will be more accurate - It brings the behaviour inline with other passes that respect OptSize - It respects the -pgo-cold-func-opt flag so users can control the behaviour - It prevents the need to run OuterAnalysisManagerProxy as often which is probably good for compile time --- .../llvm/Analysis/LoopAnalysisManager.h | 2 - .../llvm/Transforms/Scalar/LoopPassManager.h | 15 +++---- llvm/lib/Passes/PassBuilder.cpp | 33 +++++++-------- llvm/lib/Passes/PassBuilderPipelines.cpp | 34 +++++++--------- .../lib/Transforms/Scalar/LoopPassManager.cpp | 5 --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 40 +------------------ llvm/test/Other/loop-pm-invalidation.ll | 30 -------------- llvm/test/Other/new-pm-defaults.ll | 1 - .../Other/new-pm-thinlto-postlink-defaults.ll | 1 - .../new-pm-thinlto-postlink-pgo-defaults.ll | 1 - ...-pm-thinlto-postlink-samplepgo-defaults.ll | 1 - .../Other/new-pm-thinlto-prelink-defaults.ll | 1 - .../new-pm-thinlto-prelink-pgo-defaults.ll | 1 - ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 1 - .../unswitch-cold-func.ll} | 5 ++- .../nontrivial-unswitch-markloopasdeleted.ll | 1 - 16 files changed, 38 insertions(+), 134 deletions(-) rename llvm/test/Transforms/{SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll => PhaseOrdering/unswitch-cold-func.ll} (92%) diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h index fc69cb03849c6..1755257fe6c89 100644 --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -36,7 +36,6 @@ namespace llvm { class AAResults; class AssumptionCache; -class BlockFrequencyInfo; class DominatorTree; class Function; class Loop; @@ -58,7 +57,6 @@ struct LoopStandardAnalysisResults { ScalarEvolution &SE; TargetLibraryInfo &TLI; TargetTransformInfo &TTI; - BlockFrequencyInfo *BFI; MemorySSA *MSSA; }; diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 750f9546625a2..1842d2dc5f05a 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -404,10 +404,8 @@ class FunctionToLoopPassAdaptor explicit FunctionToLoopPassAdaptor(std::unique_ptr Pass, bool UseMemorySSA = false, - bool UseBlockFrequencyInfo = false, bool LoopNestMode = false) : Pass(std::move(Pass)), UseMemorySSA(UseMemorySSA), - UseBlockFrequencyInfo(UseBlockFrequencyInfo), LoopNestMode(LoopNestMode) { LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); LoopCanonicalizationFPM.addPass(LCSSAPass()); @@ -429,7 +427,6 @@ class FunctionToLoopPassAdaptor FunctionPassManager LoopCanonicalizationFPM; bool UseMemorySSA = false; - bool UseBlockFrequencyInfo = false; const bool LoopNestMode; }; @@ -442,8 +439,7 @@ class FunctionToLoopPassAdaptor /// \c LoopPassManager and the returned adaptor will be in loop-nest mode. template inline FunctionToLoopPassAdaptor -createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false, - bool UseBlockFrequencyInfo = false) { +createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false) { if constexpr (is_detected::value) { using PassModelT = detail::PassModel( new PassModelT(std::forward(Pass))), - UseMemorySSA, UseBlockFrequencyInfo, false); + UseMemorySSA, false); } else { LoopPassManager LPM; LPM.addPass(std::forward(Pass)); @@ -465,7 +461,7 @@ createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false, return FunctionToLoopPassAdaptor( std::unique_ptr( new PassModelT(std::move(LPM))), - UseMemorySSA, UseBlockFrequencyInfo, true); + UseMemorySSA, true); } } @@ -474,8 +470,7 @@ createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false, template <> inline FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassManager &&LPM, - bool UseMemorySSA, - bool UseBlockFrequencyInfo) { + bool UseMemorySSA) { // Check if LPM contains any loop pass and if it does not, returns an adaptor // in loop-nest mode. using PassModelT = @@ -487,7 +482,7 @@ createFunctionToLoopPassAdaptor(LoopPassManager &&LPM, return FunctionToLoopPassAdaptor( std::unique_ptr( new PassModelT(std::move(LPM))), - UseMemorySSA, UseBlockFrequencyInfo, LoopNestMode); + UseMemorySSA, LoopNestMode); } /// Pass for printing a loop's contents as textual IR. diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4a47dac66a097..d5b44a013d4a7 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1931,13 +1931,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToFunctionPassAdaptor( \ - createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \ return Error::success(); \ } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToFunctionPassAdaptor( \ - createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \ return Error::success(); \ } #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ @@ -1945,9 +1945,8 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ - MPM.addPass( \ - createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \ - CREATE_PASS(Params.get()), false, false))); \ + MPM.addPass(createModuleToFunctionPassAdaptor( \ + createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \ return Error::success(); \ } #include "PassRegistry.def" @@ -2046,13 +2045,13 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ - createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \ return Error::success(); \ } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ - createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \ return Error::success(); \ } #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ @@ -2060,9 +2059,8 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ - CGPM.addPass( \ - createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \ - CREATE_PASS(Params.get()), false, false))); \ + CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ + createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \ return Error::success(); \ } #include "PassRegistry.def" @@ -2095,11 +2093,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, return Err; // Add the nested pass manager with the appropriate adaptor. bool UseMemorySSA = (Name == "loop-mssa"); - bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) { - return Pipeline.Name.contains("simple-loop-unswitch"); - }); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA, - UseBFI)); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA)); return Error::success(); } if (Name == "machine-function") { @@ -2152,12 +2147,12 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, // The risk is that it may become obsolete if we're not careful. #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ - FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \ + FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \ return Error::success(); \ } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ - FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \ + FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \ return Error::success(); \ } #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ @@ -2165,8 +2160,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ - FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \ - false, false)); \ + FPM.addPass( \ + createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false)); \ return Error::success(); \ } #include "PassRegistry.def" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index c3f35f0f5e7fa..3dfe9cf51865c 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -517,16 +517,14 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, invokeLoopOptimizerEndEPCallbacks(LPM2, Level); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), - /*UseMemorySSA=*/true, - /*UseBlockFrequencyInfo=*/true)); + /*UseMemorySSA=*/true)); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), - /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/false)); + /*UseMemorySSA=*/false)); // Delete small array after loop unroll. FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); @@ -706,8 +704,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, invokeLoopOptimizerEndEPCallbacks(LPM2, Level); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), - /*UseMemorySSA=*/true, - /*UseBlockFrequencyInfo=*/true)); + /*UseMemorySSA=*/true)); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); @@ -715,8 +712,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), - /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/false)); + /*UseMemorySSA=*/false)); // Delete small array after loop unroll. FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); @@ -769,7 +765,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true), - /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); + /*UseMemorySSA=*/true)); FPM.addPass(CoroElidePass()); @@ -837,8 +833,7 @@ void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM, createFunctionToLoopPassAdaptor( LoopRotatePass(EnableLoopHeaderDuplication || Level != OptimizationLevel::Oz), - /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/false), + /*UseMemorySSA=*/false), PTO.EagerlyInvalidateAnalyses)); } } @@ -1354,8 +1349,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); ExtraPasses.addPass( - createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, - /*UseBlockFrequencyInfo=*/true)); + createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true)); ExtraPasses.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); ExtraPasses.addPass(InstCombinePass()); @@ -1433,7 +1427,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, FPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true), - /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); + /*UseMemorySSA=*/true)); // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. @@ -1510,7 +1504,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true), - /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); + /*USeMemorySSA=*/true)); } OptimizePM.addPass(Float2IntPass()); @@ -1550,8 +1544,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (PTO.LoopInterchange) LPM.addPass(LoopInterchangePass()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor( - std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false)); // FIXME: This may not be the right place in the pipeline. // We need to have the data to support the right place. @@ -2100,7 +2094,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true), - /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); + /*USeMemorySSA=*/true)); if (RunNewGVN) MainFPM.addPass(NewGVNPass()); @@ -2130,8 +2124,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, PTO.ForgetAllSCEVInLoopUnroll)); // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. - MainFPM.addPass(createFunctionToLoopPassAdaptor( - std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); + MainFPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false)); MainFPM.addPass(LoopDistributePass()); diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 32078b1720508..47a1b95339186 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -8,7 +8,6 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -219,9 +218,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, // Get the analysis results needed by loop passes. MemorySSA *MSSA = UseMemorySSA ? (&AM.getResult(F).getMSSA()) : nullptr; - BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData() - ? (&AM.getResult(F)) - : nullptr; LoopStandardAnalysisResults LAR = {AM.getResult(F), AM.getResult(F), AM.getResult(F), @@ -229,7 +225,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, AM.getResult(F), AM.getResult(F), AM.getResult(F), - BFI, MSSA}; // Setup the loop analysis manager from its proxy. It is important that diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index e4ba70d1bce16..5af6c96c56a06 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3611,8 +3610,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, - MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) { + MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); @@ -3652,35 +3650,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, if (F->hasOptSize()) return false; - // Returns true if Loop L's loop nest is cold, i.e. if the headers of L, - // of the loops L is nested in, and of the loops nested in L are all cold. - auto IsLoopNestCold = [&](const Loop *L) { - // Check L and all of its parent loops. - auto *Parent = L; - while (Parent) { - if (!PSI->isColdBlock(Parent->getHeader(), BFI)) - return false; - Parent = Parent->getParentLoop(); - } - // Next check all loops nested within L. - SmallVector Worklist; - llvm::append_range(Worklist, L->getSubLoops()); - while (!Worklist.empty()) { - auto *CurLoop = Worklist.pop_back_val(); - if (!PSI->isColdBlock(CurLoop->getHeader(), BFI)) - return false; - llvm::append_range(Worklist, CurLoop->getSubLoops()); - } - return true; - }; - - // Skip cold loops in cold loop nests, as unswitching them brings little - // benefit but increases the code size - if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) { - LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n"); - return false; - } - // Perform legality checks. if (!isSafeForNoNTrivialUnswitching(L, LI)) return false; @@ -3705,11 +3674,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &U) { Function &F = *L.getHeader()->getParent(); (void)F; - ProfileSummaryInfo *PSI = nullptr; - if (auto OuterProxy = - AM.getResult(L, AR) - .getCachedResult(F)) - PSI = OuterProxy->getCachedResult(*F.getParent()); LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); @@ -3720,7 +3684,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, - &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U)) + &AR.SE, MSSAU ? &*MSSAU : nullptr, U)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) diff --git a/llvm/test/Other/loop-pm-invalidation.ll b/llvm/test/Other/loop-pm-invalidation.ll index 4bead0b734eae..25552f7f139fd 100644 --- a/llvm/test/Other/loop-pm-invalidation.ll +++ b/llvm/test/Other/loop-pm-invalidation.ll @@ -16,11 +16,6 @@ ; RUN: opt -disable-output -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager %s -aa-pipeline= 2>&1 \ ; RUN: -passes='loop(no-op-loop,loop-deletion),invalidate,loop(no-op-loop)' \ ; RUN: | FileCheck %s --check-prefix=CHECK-SCEV-INV-AFTER-DELETE -; -; Test that BFI is invalidated after the loop adapter if any of the loop passes -; invalidated it. -; RUN: opt -disable-output -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager %s -aa-pipeline= 2>&1 \ -; RUN: -O1 | FileCheck %s --check-prefix=CHECK-BFI-INV define void @no_loops() { ; CHECK-LOOP-INV: Running pass: LoopSimplifyPass @@ -247,28 +242,3 @@ l0.header: exit: ret void } - -; CHECK-BFI-INV-LABEL: Running analysis: OuterAnalysisManagerProxy<{{.*}}> on loop %l0.header in function simplifiable_loop -; CHECK-BFI-INV-NEXT: Running pass: LoopInstSimplifyPass on loop %l0.header in function simplifiable_loop -; CHECK-BFI-INV-NEXT: Running pass: LoopSimplifyCFGPass on loop %l0.header in function simplifiable_loop -; CHECK-BFI-INV-NEXT: Running pass: LICMPass on loop %l0.header in function simplifiable_loop -; CHECK-BFI-INV-NEXT: Running pass: LoopRotatePass on loop %l0.header in function simplifiable_loop -; CHECK-BFI-INV-NEXT: Running pass: LICMPass on loop %l0.header in function simplifiable_loop -; CHECK-BFI-INV-NEXT: Running pass: SimpleLoopUnswitchPass on loop %l0.header in function simplifiable_loop -; CHECK-BFI-INV-NEXT: Invalidating analysis: PostDominatorTreeAnalysis on simplifiable_loop -; CHECK-BFI-INV-NEXT: Invalidating analysis: BranchProbabilityAnalysis on simplifiable_loop -; CHECK-BFI-INV-NEXT: Invalidating analysis: BlockFrequencyAnalysis on simplifiable_loop -; CHECK-BFI-INV-NEXT: Running pass: SimplifyCFGPass on simplifiable_loop (5 instructions) - -define void @simplifiable_loop(i1 %c) !prof !0 { -entry: - br label %l0.header - -l0.header: - br label %l0.latch - -l0.latch: - br i1 %c, label %l0.header, label %l0.latch -} - -!0 = !{!"function_entry_count", i64 1} diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index c554fdbf4c799..3d9d9027bea51 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -186,7 +186,6 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 62bb02d9b3c40..0f91cf1a1515d 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -114,7 +114,6 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 0da7a9f73bdce..bdf661911f186 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -100,7 +100,6 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 38b7890682783..7fad82248eb2d 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -109,7 +109,6 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll index 5aacd26def2be..dd6acd2c51ee7 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -146,7 +146,6 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index f6a9406596803..ee054527e20bd 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -149,7 +149,6 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 48a9433d24999..fd95e94f3c8b9 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -114,7 +114,6 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LICM ; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll b/llvm/test/Transforms/PhaseOrdering/unswitch-cold-func.ll similarity index 92% rename from llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll rename to llvm/test/Transforms/PhaseOrdering/unswitch-cold-func.ll index f1ffcc788a019..039445c04a614 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll +++ b/llvm/test/Transforms/PhaseOrdering/unswitch-cold-func.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -passes='require,function(loop-mssa(simple-loop-unswitch))' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-force-function-attrs,function(loop-mssa(simple-loop-unswitch))' -pgo-kind=pgo-instr-use-pipeline -pgo-cold-func-opt=optsize -S | FileCheck %s +; RUN: opt < %s -passes='pgo-force-function-attrs,function(loop-mssa(simple-loop-unswitch))' -pgo-kind=pgo-instr-use-pipeline -pgo-cold-func-opt=minsize -S | FileCheck %s ;; Check that non-trivial loop unswitching is not applied to a cold loop in a ;; cold loop nest. @@ -17,7 +18,7 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { ; CHECK-LABEL: define void @_Z11hotFunctionbiiPiS_S_ -; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) !prof [[PROF16:![0-9]+]] { +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] {{.*}}{ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 ; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF17:![0-9]+]] diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll index 9ab713cc8a4f5..383407b371da0 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll @@ -18,7 +18,6 @@ ; the analysis caches. ; ; CHECK: Running pass: SimpleLoopUnswitchPass on loop %loop_begin in function test6 -; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-NEXT: Clearing all analysis results for: loop_a_inner From ca43b4bccfbac16a8f57065e63f6c8591f223590 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 18 Sep 2025 17:13:13 +0800 Subject: [PATCH 2/3] Fix polly --- polly/lib/Transform/Canonicalization.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/polly/lib/Transform/Canonicalization.cpp b/polly/lib/Transform/Canonicalization.cpp index 748d710dd08c1..1be560e64af40 100644 --- a/polly/lib/Transform/Canonicalization.cpp +++ b/polly/lib/Transform/Canonicalization.cpp @@ -104,8 +104,7 @@ polly::buildCanonicalicationPassesForNPM(llvm::ModulePassManager &MPM, LoopPassManager LPM; LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz)); FPM.addPass(createFunctionToLoopPassAdaptor( - std::move(LPM), /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/false)); + std::move(LPM), /*UseMemorySSA=*/false)); } if (PollyInliner) { MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); @@ -121,8 +120,7 @@ polly::buildCanonicalicationPassesForNPM(llvm::ModulePassManager &MPM, LoopPassManager LPM; LPM.addPass(IndVarSimplifyPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - std::move(LPM), /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/true)); + std::move(LPM), /*UseMemorySSA=*/false)); } return FPM; From 754f24296acec7b97ada10ec2453cff62dc0b635 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 18 Sep 2025 18:53:57 +0800 Subject: [PATCH 3/3] Remove preserve-bpi.ll test This was added to illustrate the lossy preservation of BPI in 452714f8f8037ff37f9358317651d1652e231db2 But it fails now because we don't run OuterAnalysisManagerProxy beforehand. I'm not sure if it's still a useful thing to test given that #159516 removes BPI from the adaptor, so I've gone ahead and deleted the test --- .../LoopPredication/preserve-bpi.ll | 60 ------------------- 1 file changed, 60 deletions(-) delete mode 100644 llvm/test/Transforms/LoopPredication/preserve-bpi.ll diff --git a/llvm/test/Transforms/LoopPredication/preserve-bpi.ll b/llvm/test/Transforms/LoopPredication/preserve-bpi.ll deleted file mode 100644 index 7fbb19783f464..0000000000000 --- a/llvm/test/Transforms/LoopPredication/preserve-bpi.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt -mtriple=x86_64 -passes='loop-mssa(loop-predication,licm,simple-loop-unswitch,loop-simplifycfg)' -debug-pass-manager -debug-only=branch-prob -S < %s 2>&1 | FileCheck %s - -; REQUIRES: asserts - -; This test is to solely check that we do not run BPI every single time loop -; predication is invoked (since BPI is preserved as part of -; LoopStandardAnalysisResults). -declare void @llvm.experimental.guard(i1, ...) - -; CHECK: Running pass: LoopPredicationPass on loop -; CHECK-NEXT: Running pass: LICMPass on loop -; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on loop -; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy -; CHECK-NEXT: Running pass: LoopPredicationPass on loop -; CHECK-NEXT: Running pass: LICMPass on loop -; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on loop -; CHECK-NEXT: Running pass: LoopSimplifyCFGPass on loop - -define i32 @unsigned_loop_0_to_n_ult_check(ptr %array, i32 %length, i32 %n) { -entry: - %tmp5 = icmp eq i32 %n, 0 - br i1 %tmp5, label %exit, label %loop.preheader - -loop.preheader: ; preds = %entry - br label %loop - -loop: ; preds = %guarded, %loop.preheader - %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] - %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] - %within.bounds = icmp ult i32 %i, %length - %widenable_cond = call i1 @llvm.experimental.widenable.condition() - %exiplicit_guard_cond = and i1 %within.bounds, %widenable_cond - br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 - -deopt: ; preds = %loop - %deoptcall = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] - ret i32 %deoptcall - -guarded: ; preds = %loop - %i.i64 = zext i32 %i to i64 - %array.i.ptr = getelementptr inbounds i32, ptr %array, i64 %i.i64 - %array.i = load i32, ptr %array.i.ptr, align 4 - %loop.acc.next = add i32 %loop.acc, %array.i - %i.next = add nuw i32 %i, 1 - %continue = icmp ult i32 %i.next, %n - br i1 %continue, label %loop, label %exit, !prof !2 - -exit: ; preds = %guarded, %entry - %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %guarded ] - ret i32 %result -} - -declare i32 @llvm.experimental.deoptimize.i32(...) -declare i1 @llvm.experimental.widenable.condition() #0 - -attributes #0 = { inaccessiblememonly nounwind } - -!0 = !{!"branch_weights", i32 1048576, i32 1} -!1 = !{i32 1, i32 -2147483648} -!2 = !{!"branch_weights", i32 1024, i32 1}