Skip to content

Commit

Permalink
[mlgo] Incrementally update FunctionPropertiesInfo during inlining
Browse files Browse the repository at this point in the history
Re-computing FunctionPropertiesInfo after each inlining may be very time
consuming: in certain cases, e.g. large caller with lots of callsites,
and when the overall IR doesn't increase (thus not tripping a size bloat
threshold).

This patch addresses this by incrementally updating
FunctionPropertiesInfo.

Differential Revision: https://reviews.llvm.org/D125841
  • Loading branch information
mtrofin committed Jun 1, 2022
1 parent abdf0da commit f46dd19
Show file tree
Hide file tree
Showing 5 changed files with 666 additions and 53 deletions.
38 changes: 38 additions & 0 deletions llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
Expand Up @@ -14,17 +14,33 @@
#ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
#define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H

#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"

namespace llvm {
class Function;
class LoopInfo;

class FunctionPropertiesInfo {
friend class FunctionPropertiesUpdater;
void updateForBB(const BasicBlock &BB, int64_t Direction);
void updateAggregateStats(const Function &F, const LoopInfo &LI);
void reIncludeBB(const BasicBlock &BB, const LoopInfo &LI);

public:
static FunctionPropertiesInfo getFunctionPropertiesInfo(const Function &F,
const LoopInfo &LI);

bool operator==(const FunctionPropertiesInfo &FPI) const {
return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0;
}

bool operator!=(const FunctionPropertiesInfo &FPI) const {
return !(*this == FPI);
}

void print(raw_ostream &OS) const;

/// Number of basic blocks
Expand Down Expand Up @@ -57,6 +73,9 @@ class FunctionPropertiesInfo {

// Number of Top Level Loops in the Function
int64_t TopLevelLoopCount = 0;

// All non-debug instructions
int64_t TotalInstructionCount = 0;
};

// Analysis pass
Expand All @@ -82,5 +101,24 @@ class FunctionPropertiesPrinterPass
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};

/// Correctly update FunctionPropertiesInfo post-inlining. A
/// FunctionPropertiesUpdater keeps the state necessary for tracking the changes
/// llvm::InlineFunction makes. The idea is that inlining will at most modify
/// a few BBs of the Caller (maybe the entry BB and definitely the callsite BB)
/// and potentially affect exception handling BBs in the case of invoke
/// inlining.
class FunctionPropertiesUpdater {
public:
FunctionPropertiesUpdater(FunctionPropertiesInfo &FPI, const CallBase &CB);

void finish(const LoopInfo &LI);

private:
FunctionPropertiesInfo &FPI;
const BasicBlock &CallSiteBB;
const Function &Caller;

DenseSet<const BasicBlock *> Successors;
};
} // namespace llvm
#endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
26 changes: 14 additions & 12 deletions llvm/include/llvm/Analysis/MLInlineAdvisor.h
Expand Up @@ -9,6 +9,7 @@
#ifndef LLVM_ANALYSIS_MLINLINEADVISOR_H
#define LLVM_ANALYSIS_MLINLINEADVISOR_H

#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/MLModelRunner.h"
Expand All @@ -33,13 +34,17 @@ class MLInlineAdvisor : public InlineAdvisor {
void onPassEntry() override;
void onPassExit(LazyCallGraph::SCC *SCC) override;

int64_t getIRSize(const Function &F) const { return F.getInstructionCount(); }
int64_t getIRSize(Function &F) const {
return getCachedFPI(F).TotalInstructionCount;
}
void onSuccessfulInlining(const MLInlineAdvice &Advice,
bool CalleeWasDeleted);

bool isForcedToStop() const { return ForceStop; }
int64_t getLocalCalls(Function &F);
const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
FunctionPropertiesInfo &getCachedFPI(Function &) const;
const LoopInfo &getLoopInfo(Function &F) const;

protected:
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
Expand Down Expand Up @@ -67,6 +72,8 @@ class MLInlineAdvisor : public InlineAdvisor {
<< "\n";
}

mutable DenseMap<const Function *, FunctionPropertiesInfo> FPICache;

LazyCallGraph &CG;

int64_t NodeCount = 0;
Expand All @@ -86,16 +93,7 @@ class MLInlineAdvisor : public InlineAdvisor {
class MLInlineAdvice : public InlineAdvice {
public:
MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
OptimizationRemarkEmitter &ORE, bool Recommendation)
: InlineAdvice(Advisor, CB, ORE, Recommendation),
CallerIRSize(Advisor->isForcedToStop() ? 0
: Advisor->getIRSize(*Caller)),
CalleeIRSize(Advisor->isForcedToStop() ? 0
: Advisor->getIRSize(*Callee)),
CallerAndCalleeEdges(Advisor->isForcedToStop()
? 0
: (Advisor->getLocalCalls(*Caller) +
Advisor->getLocalCalls(*Callee))) {}
OptimizationRemarkEmitter &ORE, bool Recommendation);
virtual ~MLInlineAdvice() = default;

void recordInliningImpl() override;
Expand All @@ -112,10 +110,14 @@ class MLInlineAdvice : public InlineAdvice {

private:
void reportContextForRemark(DiagnosticInfoOptimizationBase &OR);

void updateCachedCallerFPI();
MLInlineAdvisor *getAdvisor() const {
return static_cast<MLInlineAdvisor *>(Advisor);
};
// Make a copy of the FPI of the caller right before inlining. If inlining
// fails, we can just update the cache with that value.
const FunctionPropertiesInfo PreInlineCallerFPI;
Optional<FunctionPropertiesUpdater> FPU;
};

} // namespace llvm
Expand Down
150 changes: 117 additions & 33 deletions llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
Expand Up @@ -12,49 +12,75 @@
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Instructions.h"
#include <deque>

using namespace llvm;

FunctionPropertiesInfo
FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
const LoopInfo &LI) {

FunctionPropertiesInfo FPI;
namespace {
int64_t getNrBlocksFromCond(const BasicBlock &BB) {
int64_t Ret = 0;
if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
if (BI->isConditional())
Ret += BI->getNumSuccessors();
} else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
}
return Ret;
}

FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
int64_t getUses(const Function &F) {
return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
}
} // namespace

for (const auto &BB : F) {
++FPI.BasicBlockCount;
void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB,
const LoopInfo &LI) {
updateForBB(BB, +1);
MaxLoopDepth =
std::max(MaxLoopDepth, static_cast<int64_t>(LI.getLoopDepth(&BB)));
}

if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
if (BI->isConditional())
FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors();
} else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
FPI.BlocksReachedFromConditionalInstruction +=
(SI->getNumCases() + (nullptr != SI->getDefaultDest()));
void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB,
int64_t Direction) {
assert(Direction == 1 || Direction == -1);
BasicBlockCount += Direction;
BlocksReachedFromConditionalInstruction +=
(Direction * getNrBlocksFromCond(BB));
for (const auto &I : BB) {
if (auto *CS = dyn_cast<CallBase>(&I)) {
const auto *Callee = CS->getCalledFunction();
if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
DirectCallsToDefinedFunctions += Direction;
}

for (const auto &I : BB) {
if (auto *CS = dyn_cast<CallBase>(&I)) {
const auto *Callee = CS->getCalledFunction();
if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
++FPI.DirectCallsToDefinedFunctions;
}
if (I.getOpcode() == Instruction::Load) {
++FPI.LoadInstCount;
} else if (I.getOpcode() == Instruction::Store) {
++FPI.StoreInstCount;
}
if (I.getOpcode() == Instruction::Load) {
LoadInstCount += Direction;
} else if (I.getOpcode() == Instruction::Store) {
StoreInstCount += Direction;
}
// Loop Depth of the Basic Block
int64_t LoopDepth;
LoopDepth = LI.getLoopDepth(&BB);
if (FPI.MaxLoopDepth < LoopDepth)
FPI.MaxLoopDepth = LoopDepth;
}
FPI.TopLevelLoopCount += llvm::size(LI);
TotalInstructionCount += Direction * BB.sizeWithoutDebug();
}

void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
const LoopInfo &LI) {

Uses = getUses(F);
TopLevelLoopCount = llvm::size(LI);
}

FunctionPropertiesInfo
FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
const LoopInfo &LI) {

FunctionPropertiesInfo FPI;
for (const auto &BB : F)
if (!pred_empty(&BB) || BB.isEntryBlock())
FPI.reIncludeBB(BB, LI);
FPI.updateAggregateStats(F, LI);
return FPI;
}

Expand All @@ -68,7 +94,8 @@ void FunctionPropertiesInfo::print(raw_ostream &OS) const {
<< "LoadInstCount: " << LoadInstCount << "\n"
<< "StoreInstCount: " << StoreInstCount << "\n"
<< "MaxLoopDepth: " << MaxLoopDepth << "\n"
<< "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n";
<< "TopLevelLoopCount: " << TopLevelLoopCount << "\n"
<< "TotalInstructionCount: " << TotalInstructionCount << "\n\n";
}

AnalysisKey FunctionPropertiesAnalysis::Key;
Expand All @@ -87,3 +114,60 @@ FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
AM.getResult<FunctionPropertiesAnalysis>(F).print(OS);
return PreservedAnalyses::all();
}

FunctionPropertiesUpdater::FunctionPropertiesUpdater(
FunctionPropertiesInfo &FPI, const CallBase &CB)
: FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) {

// For BBs that are likely to change, we subtract from feature totals their
// contribution. Some features, like max loop counts or depths, are left
// invalid, as they will be updated post-inlining.
SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs;
// The CB BB will change - it'll either be split or the callee's body (single
// BB) will be pasted in.
LikelyToChangeBBs.insert(&CallSiteBB);

// The caller's entry BB may change due to new alloca instructions.
LikelyToChangeBBs.insert(&*Caller.begin());

// The successors may become unreachable in the case of `invoke` inlining.
// We track successors separately, too, because they form a boundary, together
// with the CB BB ('Entry') between which the inlined callee will be pasted.
Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB));
for (const auto *BB : Successors)
LikelyToChangeBBs.insert(BB);

// Commit the change. While some of the BBs accounted for above may play dual
// role - e.g. caller's entry BB may be the same as the callsite BB - set
// insertion semantics make sure we account them once. This needs to be
// followed in `finish`, too.
for (const auto *BB : LikelyToChangeBBs)
FPI.updateForBB(*BB, -1);
}

void FunctionPropertiesUpdater::finish(const LoopInfo &LI) {
DenseSet<const BasicBlock *> ReIncluded;
std::deque<const BasicBlock *> Worklist;

if (&CallSiteBB != &*Caller.begin()) {
FPI.reIncludeBB(*Caller.begin(), LI);
ReIncluded.insert(&*Caller.begin());
}

// Update feature values from the BBs that were copied from the callee, or
// might have been modified because of inlining. The latter have been
// subtracted in the FunctionPropertiesUpdater ctor.
Worklist.push_back(&CallSiteBB);
while (!Worklist.empty()) {
const auto *BB = Worklist.front();
Worklist.pop_front();
if (!ReIncluded.insert(BB).second)
continue;
FPI.reIncludeBB(*BB, LI);
if (!Successors.contains(BB))
for (const auto *Succ : successors(BB))
Worklist.push_back(Succ);
}
FPI.updateAggregateStats(Caller, LI);
assert(FPI == FunctionPropertiesInfo::getFunctionPropertiesInfo(Caller, LI));
}

0 comments on commit f46dd19

Please sign in to comment.