Skip to content

Commit df89564

Browse files
authored
[SimpleLoopUnswitch] Don't use BlockFrequencyInfo to skip cold loops (#159522)
In https://reviews.llvm.org/D129599, non-trivial switching was disabled for cold loops in the interest of code size. This added a dependency on BlockFrequencyInfo with PGO, but in loop passes this is only available on a lossy basis: see https://reviews.llvm.org/D86156 LICM moved away from BFI so as of today SimpleLoopUnswitch is the only remaining loop pass that uses BFI, for the sole reason to prevent code size increases in PGO builds. It doesn't use BFI if there's no profile summary available. After some investigation on llvm-test-suite it turns out that the lossy BFI causes very significant deviations in block frequency, since when new loops are deleted/created during the loop pass manager it can return frequencies for different loops altogether. This results in unswitchable loops being mistakenly skipped because they are thought to be cold. This patch removes the use of BFI from SimpleLoopUnswitch and thus the last remaining use of BFI in a loop pass. To recover the original intent of not unswitching cold code, PGOForceFunctionAttrs can be used to annotate functions which can be optimized for code size, since SimpleLoopUnswitch will respect OptSize: https://reviews.llvm.org/D94559 This isn't 100% the same behaviour since the previous behaviour checked for coldness at the loop level and this is now at the function level. We could expand PGOForceFunctionAttrs to be more granular at the loop level, #159595 tracks this idea.
1 parent fe5b72a commit df89564

18 files changed

+42
-200
lines changed

llvm/include/llvm/Analysis/LoopAnalysisManager.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ namespace llvm {
3636

3737
class AAResults;
3838
class AssumptionCache;
39-
class BlockFrequencyInfo;
4039
class DominatorTree;
4140
class Function;
4241
class Loop;
@@ -58,7 +57,6 @@ struct LoopStandardAnalysisResults {
5857
ScalarEvolution &SE;
5958
TargetLibraryInfo &TLI;
6059
TargetTransformInfo &TTI;
61-
BlockFrequencyInfo *BFI;
6260
MemorySSA *MSSA;
6361
};
6462

llvm/include/llvm/Transforms/Scalar/LoopPassManager.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -404,10 +404,8 @@ class FunctionToLoopPassAdaptor
404404

405405
explicit FunctionToLoopPassAdaptor(std::unique_ptr<PassConceptT> Pass,
406406
bool UseMemorySSA = false,
407-
bool UseBlockFrequencyInfo = false,
408407
bool LoopNestMode = false)
409408
: Pass(std::move(Pass)), UseMemorySSA(UseMemorySSA),
410-
UseBlockFrequencyInfo(UseBlockFrequencyInfo),
411409
LoopNestMode(LoopNestMode) {
412410
LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
413411
LoopCanonicalizationFPM.addPass(LCSSAPass());
@@ -429,7 +427,6 @@ class FunctionToLoopPassAdaptor
429427
FunctionPassManager LoopCanonicalizationFPM;
430428

431429
bool UseMemorySSA = false;
432-
bool UseBlockFrequencyInfo = false;
433430
const bool LoopNestMode;
434431
};
435432

@@ -442,8 +439,7 @@ class FunctionToLoopPassAdaptor
442439
/// \c LoopPassManager and the returned adaptor will be in loop-nest mode.
443440
template <typename LoopPassT>
444441
inline FunctionToLoopPassAdaptor
445-
createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false,
446-
bool UseBlockFrequencyInfo = false) {
442+
createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false) {
447443
if constexpr (is_detected<HasRunOnLoopT, LoopPassT>::value) {
448444
using PassModelT =
449445
detail::PassModel<Loop, LoopPassT, LoopAnalysisManager,
@@ -453,7 +449,7 @@ createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false,
453449
return FunctionToLoopPassAdaptor(
454450
std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
455451
new PassModelT(std::forward<LoopPassT>(Pass))),
456-
UseMemorySSA, UseBlockFrequencyInfo, false);
452+
UseMemorySSA, false);
457453
} else {
458454
LoopPassManager LPM;
459455
LPM.addPass(std::forward<LoopPassT>(Pass));
@@ -465,7 +461,7 @@ createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false,
465461
return FunctionToLoopPassAdaptor(
466462
std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
467463
new PassModelT(std::move(LPM))),
468-
UseMemorySSA, UseBlockFrequencyInfo, true);
464+
UseMemorySSA, true);
469465
}
470466
}
471467

@@ -474,8 +470,7 @@ createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false,
474470
template <>
475471
inline FunctionToLoopPassAdaptor
476472
createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
477-
bool UseMemorySSA,
478-
bool UseBlockFrequencyInfo) {
473+
bool UseMemorySSA) {
479474
// Check if LPM contains any loop pass and if it does not, returns an adaptor
480475
// in loop-nest mode.
481476
using PassModelT =
@@ -487,7 +482,7 @@ createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
487482
return FunctionToLoopPassAdaptor(
488483
std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
489484
new PassModelT(std::move(LPM))),
490-
UseMemorySSA, UseBlockFrequencyInfo, LoopNestMode);
485+
UseMemorySSA, LoopNestMode);
491486
}
492487

493488
/// Pass for printing a loop's contents as textual IR.

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,23 +2027,22 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
20272027
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
20282028
if (Name == NAME) { \
20292029
MPM.addPass(createModuleToFunctionPassAdaptor( \
2030-
createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
2030+
createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
20312031
return Error::success(); \
20322032
}
20332033
#define LOOP_PASS(NAME, CREATE_PASS) \
20342034
if (Name == NAME) { \
20352035
MPM.addPass(createModuleToFunctionPassAdaptor( \
2036-
createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
2036+
createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
20372037
return Error::success(); \
20382038
}
20392039
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
20402040
if (checkParametrizedPassName(Name, NAME)) { \
20412041
auto Params = parsePassParameters(PARSER, Name, NAME); \
20422042
if (!Params) \
20432043
return Params.takeError(); \
2044-
MPM.addPass( \
2045-
createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
2046-
CREATE_PASS(Params.get()), false, false))); \
2044+
MPM.addPass(createModuleToFunctionPassAdaptor( \
2045+
createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \
20472046
return Error::success(); \
20482047
}
20492048
#include "PassRegistry.def"
@@ -2142,23 +2141,22 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
21422141
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
21432142
if (Name == NAME) { \
21442143
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
2145-
createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
2144+
createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
21462145
return Error::success(); \
21472146
}
21482147
#define LOOP_PASS(NAME, CREATE_PASS) \
21492148
if (Name == NAME) { \
21502149
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
2151-
createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
2150+
createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
21522151
return Error::success(); \
21532152
}
21542153
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
21552154
if (checkParametrizedPassName(Name, NAME)) { \
21562155
auto Params = parsePassParameters(PARSER, Name, NAME); \
21572156
if (!Params) \
21582157
return Params.takeError(); \
2159-
CGPM.addPass( \
2160-
createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
2161-
CREATE_PASS(Params.get()), false, false))); \
2158+
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
2159+
createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \
21622160
return Error::success(); \
21632161
}
21642162
#include "PassRegistry.def"
@@ -2191,11 +2189,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
21912189
return Err;
21922190
// Add the nested pass manager with the appropriate adaptor.
21932191
bool UseMemorySSA = (Name == "loop-mssa");
2194-
bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
2195-
return Pipeline.Name.contains("simple-loop-unswitch");
2196-
});
2197-
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
2198-
UseBFI));
2192+
FPM.addPass(
2193+
createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA));
21992194
return Error::success();
22002195
}
22012196
if (Name == "machine-function") {
@@ -2248,21 +2243,21 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
22482243
// The risk is that it may become obsolete if we're not careful.
22492244
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
22502245
if (Name == NAME) { \
2251-
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
2246+
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \
22522247
return Error::success(); \
22532248
}
22542249
#define LOOP_PASS(NAME, CREATE_PASS) \
22552250
if (Name == NAME) { \
2256-
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
2251+
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \
22572252
return Error::success(); \
22582253
}
22592254
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
22602255
if (checkParametrizedPassName(Name, NAME)) { \
22612256
auto Params = parsePassParameters(PARSER, Name, NAME); \
22622257
if (!Params) \
22632258
return Params.takeError(); \
2264-
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \
2265-
false, false)); \
2259+
FPM.addPass( \
2260+
createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false)); \
22662261
return Error::success(); \
22672262
}
22682263
#include "PassRegistry.def"

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -519,16 +519,14 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
519519
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
520520

521521
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
522-
/*UseMemorySSA=*/true,
523-
/*UseBlockFrequencyInfo=*/true));
522+
/*UseMemorySSA=*/true));
524523
FPM.addPass(
525524
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
526525
FPM.addPass(InstCombinePass());
527526
// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
528527
// *All* loop passes must preserve it, in order to be able to use it.
529528
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
530-
/*UseMemorySSA=*/false,
531-
/*UseBlockFrequencyInfo=*/false));
529+
/*UseMemorySSA=*/false));
532530

533531
// Delete small array after loop unroll.
534532
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -710,17 +708,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
710708
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
711709

712710
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
713-
/*UseMemorySSA=*/true,
714-
/*UseBlockFrequencyInfo=*/true));
711+
/*UseMemorySSA=*/true));
715712
FPM.addPass(
716713
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
717714
FPM.addPass(InstCombinePass());
718715
// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
719716
// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
720717
// *All* loop passes must preserve it, in order to be able to use it.
721718
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
722-
/*UseMemorySSA=*/false,
723-
/*UseBlockFrequencyInfo=*/false));
719+
/*UseMemorySSA=*/false));
724720

725721
// Delete small array after loop unroll.
726722
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -773,7 +769,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
773769
FPM.addPass(createFunctionToLoopPassAdaptor(
774770
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
775771
/*AllowSpeculation=*/true),
776-
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
772+
/*UseMemorySSA=*/true));
777773

778774
FPM.addPass(CoroElidePass());
779775

@@ -842,8 +838,7 @@ void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
842838
createFunctionToLoopPassAdaptor(
843839
LoopRotatePass(EnableLoopHeaderDuplication ||
844840
Level != OptimizationLevel::Oz),
845-
/*UseMemorySSA=*/false,
846-
/*UseBlockFrequencyInfo=*/false),
841+
/*UseMemorySSA=*/false),
847842
PTO.EagerlyInvalidateAnalyses));
848843
}
849844
}
@@ -1358,8 +1353,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
13581353
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
13591354
OptimizationLevel::O3));
13601355
ExtraPasses.addPass(
1361-
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1362-
/*UseBlockFrequencyInfo=*/true));
1356+
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
13631357
ExtraPasses.addPass(
13641358
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
13651359
ExtraPasses.addPass(InstCombinePass());
@@ -1438,7 +1432,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
14381432
FPM.addPass(createFunctionToLoopPassAdaptor(
14391433
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
14401434
/*AllowSpeculation=*/true),
1441-
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1435+
/*UseMemorySSA=*/true));
14421436

14431437
// Now that we've vectorized and unrolled loops, we may have more refined
14441438
// alignment information, try to re-derive it here.
@@ -1520,7 +1514,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
15201514
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
15211515
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
15221516
/*AllowSpeculation=*/true),
1523-
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1517+
/*USeMemorySSA=*/true));
15241518
}
15251519

15261520
OptimizePM.addPass(Float2IntPass());
@@ -1560,8 +1554,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
15601554
if (PTO.LoopInterchange)
15611555
LPM.addPass(LoopInterchangePass());
15621556

1563-
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1564-
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1557+
OptimizePM.addPass(
1558+
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
15651559

15661560
// FIXME: This may not be the right place in the pipeline.
15671561
// We need to have the data to support the right place.
@@ -2133,7 +2127,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
21332127
MainFPM.addPass(createFunctionToLoopPassAdaptor(
21342128
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
21352129
/*AllowSpeculation=*/true),
2136-
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2130+
/*USeMemorySSA=*/true));
21372131

21382132
if (RunNewGVN)
21392133
MainFPM.addPass(NewGVNPass());
@@ -2163,8 +2157,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
21632157
PTO.ForgetAllSCEVInLoopUnroll));
21642158
// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
21652159
// *All* loop passes must preserve it, in order to be able to use it.
2166-
MainFPM.addPass(createFunctionToLoopPassAdaptor(
2167-
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2160+
MainFPM.addPass(
2161+
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
21682162

21692163
MainFPM.addPass(LoopDistributePass());
21702164

llvm/lib/Transforms/Scalar/LoopPassManager.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#include "llvm/Transforms/Scalar/LoopPassManager.h"
1010
#include "llvm/Analysis/AssumptionCache.h"
11-
#include "llvm/Analysis/BlockFrequencyInfo.h"
1211
#include "llvm/Analysis/MemorySSA.h"
1312
#include "llvm/Analysis/ScalarEvolution.h"
1413
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -217,17 +216,13 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
217216
// Get the analysis results needed by loop passes.
218217
MemorySSA *MSSA =
219218
UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
220-
BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
221-
? (&AM.getResult<BlockFrequencyAnalysis>(F))
222-
: nullptr;
223219
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
224220
AM.getResult<AssumptionAnalysis>(F),
225221
AM.getResult<DominatorTreeAnalysis>(F),
226222
AM.getResult<LoopAnalysis>(F),
227223
AM.getResult<ScalarEvolutionAnalysis>(F),
228224
AM.getResult<TargetLibraryAnalysis>(F),
229225
AM.getResult<TargetIRAnalysis>(F),
230-
BFI,
231226
MSSA};
232227

233228
// Setup the loop analysis manager from its proxy. It is important that

0 commit comments

Comments
 (0)