Skip to content

Commit

Permalink
Add dyno stats to BOLT.
Browse files Browse the repository at this point in the history
Summary:
Add "-dyno-stats" option that prints instruction stats based on
the execution profile similar to below:

BOLT-INFO: program-wide dynostats after optimizations:
  executed forward branches : 109706407 (+8.1%)
  taken forward branches : 13769074 (-55.5%)
  executed backward branches : 24517582 (-25.0%)
  taken backward branches : 15330256 (-27.2%)
  executed unconditional branches : 6009826 (-35.5%)
  function calls : 17192114 (+0.0%)
  executed instructions : 837733057 (-0.4%)
  total branches : 140233815 (-2.3%)
  taken branches : 35109156 (-42.8%)

Also fixed pseudo instruction discrepancies and added assertions
for BinaryBasicBlock::getNumPseudos() to make sure the number is
synchronized with real number of pseudo instructions.

(cherry picked from FBD3826995)
  • Loading branch information
maksfb committed Aug 30, 2016
1 parent 17e6919 commit 6bef336
Show file tree
Hide file tree
Showing 8 changed files with 303 additions and 13 deletions.
19 changes: 19 additions & 0 deletions bolt/BinaryBasicBlock.cpp
Expand Up @@ -114,6 +114,25 @@ void BinaryBasicBlock::addBranchInstruction(const BinaryBasicBlock *Successor) {
Instructions.emplace_back(std::move(NewInst));
}

uint32_t BinaryBasicBlock::getNumPseudos() const {
#ifndef NDEBUG
auto &BC = Function->getBinaryContext();
uint32_t N = 0;
for (auto &Instr : Instructions) {
if (BC.MII->get(Instr.getOpcode()).isPseudo())
++N;
}
if (N != NumPseudos) {
errs() << "BOLT-ERROR: instructions for basic block " << getName()
<< " in function " << *Function << ": calculated pseudos "
<< N << ", set pseudos " << NumPseudos << ", size " << size()
<< '\n';
llvm_unreachable("pseudos mismatch");
}
#endif
return NumPseudos;
}

void BinaryBasicBlock::dump(BinaryContext& BC) const {
if (Label) outs() << Label->getName() << ":\n";
BC.printInstructions(outs(), Instructions.begin(), Instructions.end(), Offset);
Expand Down
28 changes: 24 additions & 4 deletions bolt/BinaryBasicBlock.h
Expand Up @@ -296,16 +296,22 @@ class BinaryBasicBlock {

/// If the basic block ends with a conditional branch (possibly followed by
/// an unconditional branch) and thus has 2 successors, return a successor
/// corresponding to a jump conditon which could be true or false.
/// corresponding to a jump condition which could be true or false.
/// Return nullptr if the basic block does not have a conditional jump.
const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const {
if (succ_size() != 2)
return nullptr;
return Successors[Condition == true ? 0 : 1];
}

const BinaryBranchInfo &getBranchInfo(bool Condition) const {
assert(BranchInfo.size() == 2 &&
"could only be called for blocks with 2 successors");
return BranchInfo[Condition == true ? 0 : 1];
};

/// If the basic block ends with a conditional branch (possibly followed by
/// an unconditonal branch) and thus has 2 successor, revese the order of
/// an unconditional branch) and thus has 2 successor, reverse the order of
/// its successors in CFG, update branch info, and return true. If the basic
/// block does not have 2 successors return false.
bool swapConditionalSuccessors();
Expand Down Expand Up @@ -346,12 +352,19 @@ class BinaryBasicBlock {
}

/// Add instruction before Pos in this basic block.
const_iterator insertPseudoInstr(const_iterator Pos, MCInst &Instr) {
template <typename Itr>
Itr insertPseudoInstr(Itr Pos, MCInst &Instr) {
++NumPseudos;
return Instructions.emplace(Pos, Instr);
}

uint32_t getNumPseudos() const { return NumPseudos; }
/// Return the number of pseudo instructions in the basic block.
uint32_t getNumPseudos() const;

/// Return the number of emitted instructions for this basic block.
uint32_t getNumNonPseudos() const {
return size() - getNumPseudos();
}

/// Set minimum alignment for the basic block.
void setAlignment(uint64_t Align) {
Expand Down Expand Up @@ -433,6 +446,13 @@ class BinaryBasicBlock {
return CanOutline;
}

/// Erase pseudo instruction at a given iterator.
iterator erasePseudoInstruction(iterator II) {
--NumPseudos;
return Instructions.erase(II);
}

/// Erase given (non-pseudo) instruction if found.
bool eraseInstruction(MCInst *Inst) {
return replaceInstruction(Inst, std::vector<MCInst>());
}
Expand Down
135 changes: 134 additions & 1 deletion bolt/BinaryFunction.cpp
Expand Up @@ -39,6 +39,7 @@ using namespace llvm;
namespace opts {

extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> PrintDynoStats;

static cl::opt<bool>
AgressiveSplitting("split-all-cold",
Expand All @@ -51,6 +52,12 @@ DotToolTipCode("dot-tooltip-code",
cl::ZeroOrMore,
cl::Hidden);

static cl::opt<uint32_t>
DynoStatsScale("dyno-stats-scale",
cl::desc("scale to be applied while reporting dyno stats"),
cl::Optional,
cl::init(1));

} // namespace opts

namespace llvm {
Expand All @@ -62,6 +69,8 @@ namespace bolt {
// using it.
constexpr unsigned NoRegister = 0;

constexpr const char *DynoStats::Desc[];

namespace {

/// Gets debug line information for the instruction located at the given
Expand Down Expand Up @@ -199,6 +208,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
if (IdenticalFunctionAddress != Address)
OS << "\n Id Fun Addr : 0x" << Twine::utohexstr(IdenticalFunctionAddress);

if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
DynoStats dynoStats = getDynoStats();
OS << dynoStats;
}

OS << "\n}\n";

if (!PrintInstructions || !BC.InstPrinter)
Expand Down Expand Up @@ -2006,7 +2020,7 @@ void BinaryFunction::propagateGnuArgsSizeInfo() {
// Delete DW_CFA_GNU_args_size instructions and only regenerate
// during the final code emission. The information is embedded
// inside call instructions.
II = BB->Instructions.erase(II);
II = BB->erasePseudoInstruction(II);
} else {
++II;
}
Expand Down Expand Up @@ -2575,5 +2589,124 @@ void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
}

DynoStats BinaryFunction::getDynoStats() const {
DynoStats Stats;

// Return empty-stats about the function we don't completely understand.
if (!isSimple())
return Stats;

// Basic block indices in the new layout for quick branch direction lookup.
std::unordered_map<const BinaryBasicBlock *, unsigned>
BBToIndexMap(layout_size());
unsigned Index = 0;
for (const auto &BB : layout()) {
BBToIndexMap[BB] = ++Index;
}
auto isForwardBranch = [&](const BinaryBasicBlock *From,
const BinaryBasicBlock *To) {
return BBToIndexMap[To] > BBToIndexMap[From];
};

for (const auto &BB : layout()) {
// The basic block execution count equals to the sum of incoming branch
// frequencies. This may deviate from the sum of outgoing branches of the
// basic block especially since the block may contain a function that
// does not return or a function that throws an exception.
uint64_t BBExecutionCount = 0;
for (const auto &BI : BB->BranchInfo)
if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
BBExecutionCount += BI.Count;

// Ignore blocks that were not executed.
if (BBExecutionCount == 0)
continue;

// Count the number of calls by iterating through all instructions.
for (const auto &Instr : *BB) {
if (BC.MIA->isCall(Instr)) {
Stats[DynoStats::FUNCTION_CALLS] += BBExecutionCount;
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
Stats[DynoStats::INDIRECT_CALLS] += BBExecutionCount;
}
}
}

Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;

// Update stats for branches.
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
if (!BC.MIA->analyzeBranch(BB->Instructions, TBB, FBB, CondBranch,
UncondBranch)) {
continue;
}

if (!CondBranch && !UncondBranch) {
continue;
}

// Simple unconditional branch.
if (!CondBranch) {
Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
continue;
}

// Conditional branch that could be followed by an unconditional branch.
uint64_t TakenCount = BB->getBranchInfo(true).Count;
if (TakenCount == COUNT_NO_PROFILE)
TakenCount = 0;
uint64_t NonTakenCount = BB->getBranchInfo(false).Count;
if (NonTakenCount == COUNT_NO_PROFILE)
NonTakenCount = 0;

assert(TakenCount + NonTakenCount == BBExecutionCount &&
"internal calculation error");

if (isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
} else {
Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
}

if (UncondBranch) {
Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
}
}

return Stats;
}

void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
uint64_t OtherStat) {
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
if (Other) {
OS << format(" (%+.1f%%)",
( (float) Stat - (float) OtherStat ) * 100.0 /
(float) (OtherStat + 1) );
}
OS << '\n';
};

for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
Stat < DynoStats::LAST_DYNO_STAT;
++Stat) {
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
}
}

void DynoStats::operator+=(const DynoStats &Other) {
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
Stat < DynoStats::LAST_DYNO_STAT;
++Stat) {
Stats[Stat] += Other[Stat];
}
}

} // namespace bolt
} // namespace llvm
93 changes: 91 additions & 2 deletions bolt/BinaryFunction.h
Expand Up @@ -52,6 +52,88 @@ namespace bolt {
using DWARFUnitLineTable = std::pair<DWARFCompileUnit *,
const DWARFDebugLine::LineTable *>;

/// Class encapsulating runtime statistics about an execution unit.
class DynoStats {

#define DYNO_STATS\
D(FIRST_DYNO_STAT, "<reserved>", Fn)\
D(FORWARD_COND_BRANCHES, "executed forward branches", Fn)\
D(FORWARD_COND_BRANCHES_TAKEN, "taken forward branches", Fn)\
D(BACKWARD_COND_BRANCHES, "executed backward branches", Fn)\
D(BACKWARD_COND_BRANCHES_TAKEN, "taken backward branches", Fn)\
D(UNCOND_BRANCHES, "executed unconditional branches", Fn)\
D(FUNCTION_CALLS, "all function calls", Fn)\
D(INDIRECT_CALLS, "indirect calls", Fn)\
D(INSTRUCTIONS, "executed instructions", Fn)\
D(ALL_BRANCHES, "total branches",\
Fadd(ALL_CONDITIONAL, UNCOND_BRANCHES))\
D(ALL_TAKEN, "taken branches",\
Fadd(TAKEN_CONDITIONAL, UNCOND_BRANCHES))\
D(NONTAKEN_CONDITIONAL, "non-taken conditional branches",\
Fsub(ALL_CONDITIONAL, TAKEN_CONDITIONAL))\
D(TAKEN_CONDITIONAL, "taken conditional branches",\
Fadd(FORWARD_COND_BRANCHES_TAKEN, BACKWARD_COND_BRANCHES_TAKEN))\
D(ALL_CONDITIONAL, "all conditional branches",\
Fadd(FORWARD_COND_BRANCHES, BACKWARD_COND_BRANCHES))\
D(LAST_DYNO_STAT, "<reserved>", Fn)

public:
#define D(name, ...) name,
enum : uint8_t { DYNO_STATS };
#undef D


private:
uint64_t Stats[LAST_DYNO_STAT];

#define D(name, desc, ...) desc,
static constexpr const char *Desc[] = { DYNO_STATS };
#undef D

public:
DynoStats() {
for (auto Stat = FIRST_DYNO_STAT + 0; Stat < LAST_DYNO_STAT; ++Stat)
Stats[Stat] = 0;
}

uint64_t &operator[](size_t I) {
assert(I > FIRST_DYNO_STAT && I < LAST_DYNO_STAT &&
"index out of bounds");
return Stats[I];
}

uint64_t operator[](size_t I) const {
switch (I) {
#define D(name, desc, func) \
case name: \
return func;
#define Fn Stats[I]
#define Fadd(a, b) operator[](a) + operator[](b)
#define Fsub(a, b) operator[](a) - operator[](b)
#define F(a) operator[](a)
#define Radd(a, b) (a + b)
#define Rsub(a, b) (a - b)
DYNO_STATS
#undef Fn
#undef D
default:
llvm_unreachable("index out of bounds");
}
return 0;
}

void print(raw_ostream &OS, const DynoStats *Other = nullptr) const;

void operator+=(const DynoStats &Other);
};

inline raw_ostream &operator<<(raw_ostream &OS, const DynoStats &Stats) {
Stats.print(OS, nullptr);
return OS;
}

DynoStats operator+(const DynoStats &A, const DynoStats &B);

/// BinaryFunction is a representation of machine-level function.
//
/// We use the term "Binary" as "Machine" was already taken.
Expand Down Expand Up @@ -460,7 +542,7 @@ class BinaryFunction : public AddressRangesOwner {
/// end of basic blocks.
void modifyLayout(LayoutType Type, bool MinBranchClusters, bool Split);

/// Find the loops in the CFG of the function and store infromation about
/// Find the loops in the CFG of the function and store information about
/// them.
void calculateLoopInfo();

Expand All @@ -469,7 +551,7 @@ class BinaryFunction : public AddressRangesOwner {
return BLI != nullptr;
}

/// Print loop inforamtion about the function.
/// Print loop information about the function.
void printLoopInfo(raw_ostream &OS) const;

/// View CFG in graphviz program
Expand All @@ -491,6 +573,13 @@ class BinaryFunction : public AddressRangesOwner {
return BC;
}

/// Return dynostats for the function.
///
/// The function relies on branch instructions being in-sync with CFG for
/// branch instructions stats. Thus it is better to call it after
/// fixBranches().
DynoStats getDynoStats() const;

/// Get basic block index assuming it belongs to this function.
unsigned getIndex(const BinaryBasicBlock *BB) const {
assert(BB->Index < BasicBlocks.size());
Expand Down
4 changes: 2 additions & 2 deletions bolt/BinaryPasses.cpp
Expand Up @@ -31,12 +31,12 @@ PrintReordered("print-reordered",
cl::ZeroOrMore,
cl::Hidden);

cl::opt<bool>
static cl::opt<bool>
PrintAfterBranchFixup("print-after-branch-fixup",
cl::desc("print function after fixing local branches"),
cl::Hidden);

cl::opt<bool>
static cl::opt<bool>
PrintAfterFixup("print-after-fixup",
cl::desc("print function after fixup"),
cl::Hidden);
Expand Down

0 comments on commit 6bef336

Please sign in to comment.