Skip to content

Commit

Permalink
[LICM][PhaseOrder] Don't speculate in LICM until after running loop r…
Browse files Browse the repository at this point in the history
…otate

LICM will speculatively hoist code outside of loops. This requires removing information, like alias analysis (#53794), range information (https://bugs.llvm.org/show_bug.cgi?id=50550), among others. Prior to https://reviews.llvm.org/D99249 , LICM would only be run after LoopRotate. Running Loop Rotate prior to LICM prevents a instruction hoist from being speculative, if it was conditionally executed by the iteration (as is commonly emitted by clang and other frontends). Adding the additional LICM pass first, however, forces all of these instructions to be considered speculative, even if they are not speculative after LoopRotate. This destroys information, resulting in performance losses for discarding this additional information.

This PR modifies LICM to accept a ``speculative'' parameter which allows LICM to be set to perform information-loss speculative hoists or not. Phase ordering is then modified to not perform the information-losing speculative hoists until after loop rotate is performed, preserving this additional information.

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D119965
  • Loading branch information
wsmoses committed Feb 18, 2022
1 parent f0dd818 commit d9da6a5
Show file tree
Hide file tree
Showing 11 changed files with 177 additions and 124 deletions.
3 changes: 2 additions & 1 deletion llvm/include/llvm/Transforms/Scalar.h
Expand Up @@ -133,7 +133,8 @@ Pass *createIndVarSimplifyPass();
//
Pass *createLICMPass();
Pass *createLICMPass(unsigned LicmMssaOptCap,
unsigned LicmMssaNoAccForPromotionCap);
unsigned LicmMssaNoAccForPromotionCap,
bool AllowSpeculation);

//===----------------------------------------------------------------------===//
//
Expand Down
20 changes: 14 additions & 6 deletions llvm/include/llvm/Transforms/Scalar/LICM.h
Expand Up @@ -46,14 +46,18 @@ extern cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap;
class LICMPass : public PassInfoMixin<LICMPass> {
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
bool LicmAllowSpeculation;

public:
LICMPass()
: LicmMssaOptCap(SetLicmMssaOptCap),
LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),
LicmAllowSpeculation(true) {}
LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,
bool LicmAllowSpeculation)
: LicmMssaOptCap(LicmMssaOptCap),
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
LicmAllowSpeculation(LicmAllowSpeculation) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
Expand All @@ -62,14 +66,18 @@ class LICMPass : public PassInfoMixin<LICMPass> {
class LNICMPass : public PassInfoMixin<LNICMPass> {
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
bool LicmAllowSpeculation;

public:
LNICMPass()
: LicmMssaOptCap(SetLicmMssaOptCap),
LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),
LicmAllowSpeculation(true) {}
LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,
bool LicmAllowSpeculation)
: LicmMssaOptCap(LicmMssaOptCap),
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
LicmAllowSpeculation(LicmAllowSpeculation) {}
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
Expand Down
9 changes: 7 additions & 2 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Expand Up @@ -171,10 +171,13 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as arguments.
/// Diagnostics is emitted via \p ORE. It returns changed status.
/// \p AllowSpeculation is whether values should be hoisted even if they are not
/// guaranteed to execute in the loop, but are safe to speculatively execute.
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);
SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool,
bool AllowSpeculation);

/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
Expand Down Expand Up @@ -204,12 +207,14 @@ void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
/// of the loop and loop safety information as arguments.
/// Diagnostics is emitted via \p ORE. It returns changed status.
/// \p AllowSpeculation is whether values should be hoisted even if they are not
/// guaranteed to execute in the loop, but are safe to speculatively execute.
bool promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *);
OptimizationRemarkEmitter *, bool AllowSpeculation);

/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.
Expand Down
34 changes: 24 additions & 10 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Expand Up @@ -293,14 +293,19 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LoopSimplifyCFGPass());

// Try to remove as much code from the loop header as possible,
// to reduce amount of IR that will have to be duplicated.
// to reduce amount of IR that will have to be duplicated. However,
// do not perform speculative hoisting the first time as LICM
// will destroy metadata that may not need to be destroyed if run
// after loop rotation.
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/false));

LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
LPM1.addPass(SimpleLoopUnswitchPass());
if (EnableLoopFlatten)
LPM1.addPass(LoopFlattenPass());
Expand Down Expand Up @@ -470,15 +475,20 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LoopSimplifyCFGPass());

// Try to remove as much code from the loop header as possible,
// to reduce amount of IR that will have to be duplicated.
// to reduce amount of IR that will have to be duplicated. However,
// do not perform speculative hoisting the first time as LICM
// will destroy metadata that may not need to be destroyed if run
// after loop rotation.
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/false));

// Disable header duplication in loop rotation at -Oz.
LPM1.addPass(
LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
LPM1.addPass(
SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
EnableO3NonTrivialUnswitching));
Expand Down Expand Up @@ -575,7 +585,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

FPM.addPass(DSEPass());
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));

FPM.addPass(CoroElidePass());
Expand Down Expand Up @@ -1019,7 +1030,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
ExtraPasses.addPass(CorrelatedValuePropagationPass());
ExtraPasses.addPass(InstCombinePass());
LoopPassManager LPM;
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
OptimizationLevel::O3));
ExtraPasses.addPass(
Expand Down Expand Up @@ -1087,7 +1099,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
}

Expand Down Expand Up @@ -1627,7 +1640,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,

FunctionPassManager MainFPM;
MainFPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));

if (RunNewGVN)
Expand Down
26 changes: 18 additions & 8 deletions llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
Expand Up @@ -458,13 +458,18 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLoopSimplifyCFGPass());
}
// Try to remove as much code from the loop header as possible,
// to reduce amount of IR that will have to be duplicated.
// to reduce amount of IR that will have to be duplicated. However,
// do not perform speculative hoisting the first time as LICM
// will destroy metadata that may not need to be destroyed if run
// after loop rotation.
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/false));
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
else
Expand Down Expand Up @@ -529,7 +534,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// TODO: Investigate if this is too expensive at O1.
if (OptLevel > 1) {
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
}

addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
Expand Down Expand Up @@ -588,7 +594,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
PM.add(createEarlyCSEPass());
PM.add(createCorrelatedValuePropagationPass());
PM.add(createInstructionCombiningPass());
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
PM.add(createCFGSimplificationPass(
SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
Expand Down Expand Up @@ -651,7 +658,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
}

PM.add(createWarnMissedTransformationsPass());
Expand Down Expand Up @@ -898,7 +906,8 @@ void PassManagerBuilder::populateModulePassManager(
// later might get benefit of no-alias assumption in clone loop.
if (UseLoopVersioningLICM) {
MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
}

// We add a fresh GlobalsModRef run at this point. This is particularly
Expand Down Expand Up @@ -1133,7 +1142,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.

PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
Expand Down

0 comments on commit d9da6a5

Please sign in to comment.