Skip to content

Commit

Permalink
Infrastructure for PGO enhancements in inliner
Browse files Browse the repository at this point in the history
This patch provides the following infrastructure for PGO enhancements in inliner:

Enable the use of block level profile information in inliner
Incremental update of block frequency information during inlining
Update the function entry counts of callees when they get inlined into callers.

Differential Revision: http://reviews.llvm.org/D16381

llvm-svn: 262636
  • Loading branch information
Easwaran Raman committed Mar 3, 2016
1 parent abcee45 commit 3035719
Show file tree
Hide file tree
Showing 11 changed files with 433 additions and 52 deletions.
25 changes: 23 additions & 2 deletions llvm/include/llvm/Analysis/InlineCost.h
Expand Up @@ -20,6 +20,7 @@

namespace llvm {
class AssumptionCacheTracker;
class BlockFrequencyInfo;
class CallSite;
class DataLayout;
class Function;
Expand All @@ -38,6 +39,21 @@ namespace InlineConstants {
const unsigned TotalAllocaSizeRecursiveCaller = 1024;
}

/// \brief Block frequency analysis for multiple functions.
/// This class mimics block frequency analysis on CGSCC level. Block frequency
/// info is computed on demand and cached unless they are invalidated.
class BlockFrequencyAnalysis {
private:
DenseMap<Function *, BlockFrequencyInfo *> BFM;

public:
~BlockFrequencyAnalysis();
/// \brief Returns BlockFrequencyInfo for a function.
BlockFrequencyInfo *getBlockFrequencyInfo(Function *);
/// \brief Invalidates block frequency info for a function.
void invalidateBlockFrequencyInfo(Function *);
};

/// \brief Represents the cost of inlining a function.
///
/// This supports special values for functions which should "always" or
Expand Down Expand Up @@ -111,7 +127,8 @@ class InlineCost {
/// inlining the callsite. It is an expensive, heavyweight call.
InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT);
AssumptionCacheTracker *ACT,
BlockFrequencyAnalysis *BFA);

/// \brief Get an InlineCost with the callee explicitly specified.
/// This allows you to calculate the cost of inlining a function via a
Expand All @@ -120,7 +137,8 @@ InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
//
InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT);
AssumptionCacheTracker *ACT,
BlockFrequencyAnalysis *BFA);

int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);

Expand All @@ -129,6 +147,9 @@ int getDefaultInlineThreshold();

/// \brief Minimal filter to detect invalid constructs for inlining.
bool isInlineViable(Function &Callee);

/// \brief Return estimated count of the block \p BB.
Optional<uint64_t> getBlockCount(BasicBlock *BB, BlockFrequencyAnalysis *BFA);
}

#endif
29 changes: 29 additions & 0 deletions llvm/include/llvm/Transforms/IPO/InlinerPass.h
Expand Up @@ -24,8 +24,18 @@ class AssumptionCacheTracker;
class CallSite;
class DataLayout;
class InlineCost;
class BlockFrequencyAnalysis;
template <class PtrType, unsigned SmallSize> class SmallPtrSet;

// Functor invoked when a block is cloned during inlining.
typedef std::function<void(const BasicBlock *, const BasicBlock *)>
BlockCloningFunctor;
// Functor invoked when a function is inlined inside the basic block
// containing the call.
typedef std::function<void(BasicBlock *, Function *)> FunctionCloningFunctor;
// Functor invoked when a function gets deleted during inlining.
typedef std::function<void(Function *)> FunctionDeletedFunctor;

/// Inliner - This class contains all of the helper code which is used to
/// perform the inlining operations that do not depend on the policy.
///
Expand Down Expand Up @@ -69,9 +79,28 @@ struct Inliner : public CallGraphSCCPass {
/// shouldInline - Return true if the inliner should attempt to
/// inline at the given CallSite.
bool shouldInline(CallSite CS);
/// Set the BFI of \p Dst to be the same as \p Src.
void copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst);
/// Invalidates BFI for function \p F.
void invalidateBFI(Function *F);
/// Invalidates BFI for all functions in \p SCC.
void invalidateBFI(CallGraphSCC &SCC);
/// Update function entry count for \p Callee which has been inlined into
/// \p CallBB.
void updateEntryCount(BasicBlock *CallBB, Function *Callee);
/// \brief Update block frequency of an inlined block.
/// This method updates the block frequency of \p NewBB which is a clone of
/// \p OrigBB when the callsite \p CS gets inlined. The frequency of \p NewBB
/// is computed as follows:
/// Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq.
void updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB,
const BasicBlock *NewBB);

protected:
AssumptionCacheTracker *ACT;
std::unique_ptr<BlockFrequencyAnalysis> BFA;
/// Are we using profile guided optimization?
bool HasProfileData;
};

} // End llvm namespace
Expand Down
18 changes: 13 additions & 5 deletions llvm/include/llvm/Transforms/Utils/Cloning.h
Expand Up @@ -48,6 +48,9 @@ class AllocaInst;
class AssumptionCacheTracker;
class DominatorTree;

typedef std::function<void(const BasicBlock *, const BasicBlock *)>
BlockCloningFunctor;

/// Return an exact copy of the specified module
///
std::unique_ptr<Module> CloneModule(const Module *M);
Expand Down Expand Up @@ -157,7 +160,8 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap, bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
ClonedCodeInfo *CodeInfo = nullptr,
BlockCloningFunctor Ftor = nullptr);

/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
Expand All @@ -172,23 +176,27 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
///
void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap, bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr,
Instruction *TheCall = nullptr);
Instruction *TheCall = nullptr,
BlockCloningFunctor Ftor = nullptr);

/// InlineFunctionInfo - This class captures the data input to the
/// InlineFunction call, and records the auxiliary results produced by it.
class InlineFunctionInfo {
public:
explicit InlineFunctionInfo(CallGraph *cg = nullptr,
AssumptionCacheTracker *ACT = nullptr)
: CG(cg), ACT(ACT) {}
AssumptionCacheTracker *ACT = nullptr,
BlockCloningFunctor Ftor = nullptr)
: CG(cg), ACT(ACT), Ftor(Ftor) {}

/// CG - If non-null, InlineFunction will update the callgraph to reflect the
/// changes it makes.
CallGraph *CG;
AssumptionCacheTracker *ACT;
// Functor that is invoked when a block is cloned into the new function.
BlockCloningFunctor Ftor;

/// StaticAllocas - InlineFunction fills this in with all static allocas that
/// get copied into the caller.
Expand Down
99 changes: 83 additions & 16 deletions llvm/lib/Analysis/InlineCost.cpp
Expand Up @@ -18,13 +18,18 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstVisitor.h"
Expand Down Expand Up @@ -85,6 +90,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
// easily cacheable. Instead, use the cover function paramHasAttr.
CallSite CandidateCS;

BlockFrequencyAnalysis *BFA;
int Threshold;
int Cost;

Expand Down Expand Up @@ -153,6 +159,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// passed to support analyzing indirect calls whose target is inferred by
/// analysis.
void updateThreshold(CallSite CS, Function &Callee);
/// Adjust Threshold based on CallSiteCount and return the adjusted threshold.
int getAdjustedThreshold(int Threshold, Optional<uint64_t> CallSiteCount);

// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
Expand Down Expand Up @@ -194,17 +202,19 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {

public:
CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
Function &Callee, int Threshold, CallSite CSArg)
: TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
NumVectorInstructions(0), FiftyPercentVectorBonus(0),
TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
SROACostSavings(0), SROACostSavingsLost(0) {}
Function &Callee, int Threshold, CallSite CSArg,
BlockFrequencyAnalysis *BFA)
: TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), BFA(BFA),
Threshold(Threshold), Cost(0), IsCallerRecursive(false),
IsRecursiveCall(false), ExposesReturnsTwice(false),
HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
NumInstructionsSimplified(0), SROACostSavings(0),
SROACostSavingsLost(0) {}

bool analyzeCall(CallSite CS);

Expand Down Expand Up @@ -572,6 +582,15 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
return false;
}

// Adjust the threshold based on callsite hotness. Currently this is a nop.
int CallAnalyzer::getAdjustedThreshold(int Threshold,
Optional<uint64_t> CallSiteCount
__attribute__((unused))) {
// FIXME: The new threshold should be computed from the given Threshold and
// the callsite hotness.
return Threshold;
}

void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If -inline-threshold is not given, listen to the optsize and minsize
// attributes when they would decrease the threshold.
Expand All @@ -596,6 +615,9 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
FunctionCount = Callee.getEntryCount().getValue();
MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
}
Optional<uint64_t> CallSiteCount =
llvm::getBlockCount(CS.getInstruction()->getParent(), BFA);
Threshold = getAdjustedThreshold(Threshold, CallSiteCount);

// Listen to the inlinehint attribute or profile based hotness information
// when it would increase the threshold and the caller does not need to
Expand Down Expand Up @@ -912,7 +934,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS,
BFA);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
Expand Down Expand Up @@ -1433,9 +1456,10 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,

InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT) {
AssumptionCacheTracker *ACT,
BlockFrequencyAnalysis *BFA) {
return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
ACT);
ACT, BFA);
}

int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
Expand All @@ -1454,7 +1478,8 @@ int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
AssumptionCacheTracker *ACT) {
AssumptionCacheTracker *ACT,
BlockFrequencyAnalysis *BFA) {

// Cannot inline indirect calls.
if (!Callee)
Expand Down Expand Up @@ -1487,7 +1512,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");

CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS, BFA);
bool ShouldInline = CA.analyzeCall(CS);

DEBUG(CA.dump());
Expand Down Expand Up @@ -1535,3 +1560,45 @@ bool llvm::isInlineViable(Function &F) {

return true;
}

/// \brief Get estimated execution count for \p BB.
Optional<uint64_t> llvm::getBlockCount(BasicBlock *BB,
BlockFrequencyAnalysis *BFA) {
if (!BFA)
return None;
Function *F = BB->getParent();
Optional<uint64_t> EntryCount = F->getEntryCount();
if (!EntryCount)
return None;
BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);
uint64_t BBFreq = BFI->getBlockFreq(BB).getFrequency();
uint64_t FunctionEntryFreq = BFI->getEntryFreq();
uint64_t BBCount = EntryCount.getValue() * BBFreq / FunctionEntryFreq;
return BBCount;
}

BlockFrequencyAnalysis::~BlockFrequencyAnalysis() {
for (auto &Entry : BFM) {
delete Entry.second;
}
}

/// \brief Get BlockFrequencyInfo for a function.
BlockFrequencyInfo *BlockFrequencyAnalysis::getBlockFrequencyInfo(Function *F) {
auto Iter = BFM.find(F);
if (Iter != BFM.end())
return Iter->second;
// We need to create a BlockFrequencyInfo object for F and store it.
DominatorTree DT;
DT.recalculate(*F);
LoopInfo LI(DT);
BranchProbabilityInfo BPI(*F, LI);
BlockFrequencyInfo *BFI = new BlockFrequencyInfo(*F, BPI, LI);
BFM[F] = BFI;
return BFI;
}

/// \brief Invalidate BlockFrequencyInfo for a function.
void BlockFrequencyAnalysis::invalidateBlockFrequencyInfo(Function *F) {
BFM.erase(F);
}
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/IPO/InlineSimple.cpp
Expand Up @@ -59,7 +59,8 @@ class SimpleInliner : public Inliner {
InlineCost getInlineCost(CallSite CS) override {
Function *Callee = CS.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);
return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT,
HasProfileData ? BFA.get() : nullptr);
}

bool runOnSCC(CallGraphSCC &SCC) override;
Expand Down

0 comments on commit 3035719

Please sign in to comment.