Skip to content

Commit

Permalink
[BOLT-HEATMAP] Initial heat map implementation
Browse files Browse the repository at this point in the history
Summary:
Add heatmap subcommand to produce heatmaps based on perf.data with LBR.
The output is produced in colored ASCII format.

  llvm-bolt heatmap -p perf.data <executable>

    -block-size=<uint> - size of a heat map block in bytes (default 64)
    -line-size=<uint>  - number of entries per line (default 256)
    -max-address=<uint> - maximum address considered valid for heatmap
                          (default 4GB)
    -o=<string>        - heatmap output file (default stdout)

(cherry picked from FBD13969992)
  • Loading branch information
maksfb committed Feb 5, 2019
1 parent ff6e212 commit 0c704eb
Show file tree
Hide file tree
Showing 9 changed files with 501 additions and 46 deletions.
2 changes: 2 additions & 0 deletions bolt/src/CMakeLists.txt
Expand Up @@ -77,6 +77,7 @@ add_llvm_tool(llvm-bolt
DebugData.cpp
DWARFRewriter.cpp
Exceptions.cpp
Heatmap.cpp
JumpTable.cpp
MCPlusBuilder.cpp
ProfileReader.cpp
Expand All @@ -90,3 +91,4 @@ add_llvm_tool(llvm-bolt

add_llvm_tool_symlink(perf2bolt llvm-bolt)
add_llvm_tool_symlink(llvm-boltdiff llvm-bolt)
add_llvm_tool_symlink(llvm-bolt-heatmap llvm-bolt)
110 changes: 103 additions & 7 deletions bolt/src/DataAggregator.cpp
Expand Up @@ -15,6 +15,7 @@
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "DataAggregator.h"
#include "Heatmap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Options.h"
Expand All @@ -37,6 +38,8 @@ using namespace bolt;
namespace opts {

extern cl::OptionCategory AggregatorCategory;
extern bool HeatmapMode;
extern cl::SubCommand HeatmapCommand;

static cl::opt<bool>
BasicAggregation("nl",
Expand All @@ -46,12 +49,31 @@ BasicAggregation("nl",
cl::cat(AggregatorCategory));

static cl::opt<bool>
WriteAutoFDOData("autofdo",
cl::desc("generate autofdo textual data instead of bolt data"),
IgnoreBuildID("ignore-build-id",
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));

static cl::opt<unsigned>
HeatmapBlock("block-size",
cl::desc("size of a heat map block in bytes (default 64)"),
cl::init(64),
cl::sub(HeatmapCommand));

static cl::opt<std::string>
HeatmapFile("o",
cl::init("-"),
cl::desc("heatmap output file (default stdout)"),
cl::Optional,
cl::sub(HeatmapCommand));

static cl::opt<unsigned long long>
HeatmapMaxAddress("max-address",
cl::init(0xffffffff),
cl::desc("maximum address considered valid for heatmap (default 4GB)"),
cl::Optional,
cl::sub(HeatmapCommand));

static cl::opt<bool>
ReadPreAggregated("pa",
cl::desc("skip perf and read data from a pre-aggregated file format"),
Expand All @@ -60,14 +82,15 @@ ReadPreAggregated("pa",
cl::cat(AggregatorCategory));

static cl::opt<bool>
IgnoreBuildID("ignore-build-id",
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));

static cl::opt<bool>
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
WriteAutoFDOData("autofdo",
cl::desc("generate autofdo textual data instead of bolt data"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
Expand Down Expand Up @@ -459,6 +482,15 @@ void DataAggregator::parseProfile(
}

prepareToParse("events", MainEventsPPI);

if (opts::HeatmapMode) {
if (auto EC = printLBRHeatMap()) {
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
exit(1);
}
exit(0);
}

if ((!opts::BasicAggregation && parseBranchEvents()) ||
(opts::BasicAggregation && parseBasicEvents())) {
errs() << "PERF2BOLT: failed to parse samples\n";
Expand Down Expand Up @@ -967,6 +999,70 @@ bool DataAggregator::hasData() {
return true;
}

std::error_code DataAggregator::printLBRHeatMap() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);

Heatmap HM(opts::HeatmapBlock, opts::HeatmapMaxAddress);
uint64_t NumTotalSamples{0};

while (hasData()) {
auto SampleRes = parseBranchSample();
if (std::error_code EC = SampleRes.getError())
return EC;

auto &Sample = SampleRes.get();

// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR{nullptr};
for (const auto &LBR : Sample.LBR) {
if (NextLBR) {
// Record fall-through trace.
const auto TraceFrom = LBR.To;
const auto TraceTo = NextLBR->From;
++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
}
NextLBR = &LBR;
}
if (!Sample.LBR.empty()) {
HM.registerAddress(Sample.LBR.front().To);
HM.registerAddress(Sample.LBR.back().From);
}
NumTotalSamples += Sample.LBR.size();
}

if (!NumTotalSamples) {
errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
"Cannot build heatmap.\n";
exit(1);
}

outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";

outs() << "HEATMAP: building heat map...\n";

for (const auto &LBR : FallthroughLBRs) {
const auto &Trace = LBR.first;
const auto &Info = LBR.second;
HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
}

if (HM.getNumInvalidRanges())
outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';

if (!HM.size()) {
errs() << "HEATMAP-ERROR: no valid traces registered\n";
exit(1);
}

HM.print(opts::HeatmapFile);

return std::error_code();
}

std::error_code DataAggregator::parseBranchEvents() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
Expand Down
3 changes: 3 additions & 0 deletions bolt/src/DataAggregator.h
Expand Up @@ -222,6 +222,9 @@ class DataAggregator : public DataReader {
/// everything
bool hasData();

/// Print heat map based on LBR samples.
std::error_code printLBRHeatMap();

/// Parse a single perf sample containing a PID associated with a sequence of
/// LBR entries
ErrorOr<PerfBranchSample> parseBranchSample();
Expand Down
28 changes: 28 additions & 0 deletions bolt/src/Exceptions.cpp
Expand Up @@ -661,6 +661,34 @@ void BinaryFunction::emitLSDA(MCStreamer *Streamer, bool EmitColdPart) {

const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;

CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
// Prepare FDEs for fast lookup
for (const auto &Entry : EHFrame.entries()) {
const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
// Skip CIEs.
if (!CurFDE)
continue;
// There could me multiple FDEs with the same initial address, and perhaps
// different sizes (address ranges). Use the first entry with non-zero size.
auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
if (CurFDE->getAddressRange()) {
if (FDEI->second->getAddressRange() == 0) {
FDEI->second = CurFDE;
} else if (opts::Verbosity > 0) {
errs() << "BOLT-WARNING: different FDEs for function at 0x"
<< Twine::utohexstr(FDEI->first)
<< " detected; sizes: "
<< FDEI->second->getAddressRange() << " and "
<< CurFDE->getAddressRange() << '\n';
}
}
} else {
FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
}
}
}

bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
uint64_t Address = Function.getAddress();
auto I = FDEs.find(Address);
Expand Down
30 changes: 1 addition & 29 deletions bolt/src/Exceptions.h
Expand Up @@ -28,35 +28,7 @@ class RewriteInstance;
/// BinaryFunction, as well as rewriting CFI sections.
class CFIReaderWriter {
public:
explicit CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
// Prepare FDEs for fast lookup
for (const auto &Entry : EHFrame.entries()) {
const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
// Skip CIEs.
if (!CurFDE)
continue;
// There could me multiple FDEs with the same initial address, but
// different size (address range). Make sure the sizes match if they
// are non-zero. Ignore zero-sized ones.
auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
if (FDEI != FDEs.end() &&
FDEI->first == CurFDE->getInitialLocation()) {
if (FDEI->second->getAddressRange() != 0 &&
CurFDE->getAddressRange() != 0 &&
CurFDE->getAddressRange() != FDEI->second->getAddressRange()) {
errs() << "BOLT-ERROR: input FDEs for function at 0x"
<< Twine::utohexstr(FDEI->first)
<< " have conflicting sizes: "
<< FDEI->second->getAddressRange() << " and "
<< CurFDE->getAddressRange() << '\n';
} else if (FDEI->second->getAddressRange() == 0) {
FDEI->second = CurFDE;
}
continue;
}
FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
}
}
explicit CFIReaderWriter(const DWARFDebugFrame &EHFrame);

bool fillCFIInfoFor(BinaryFunction &Function) const;

Expand Down

0 comments on commit 0c704eb

Please sign in to comment.