Skip to content

Commit

Permalink
[PERF2BOLT] Add support for non-LBR aggregation
Browse files Browse the repository at this point in the history
Summary:
Previously, we depended on the python script perf2bolt.py whenever
operating with non-LBR data.

(cherry picked from FBD7620125)
  • Loading branch information
rafaelauler authored and maksfb committed Apr 13, 2018
1 parent a30fff6 commit db949fc
Show file tree
Hide file tree
Showing 8 changed files with 232 additions and 62 deletions.
5 changes: 3 additions & 2 deletions bolt/BinaryFunctionProfile.cpp
Expand Up @@ -594,7 +594,7 @@ void BinaryFunction::readSampleData() {
if (!SampleDataOrErr)
return;

// Non-LBR mode territory
// Basic samples mode territory (without LBR info)
// First step is to assign BB execution count based on samples from perf
ProfileMatchRatio = 1.0f;
removeTagsFromProfile();
Expand All @@ -603,7 +603,8 @@ void BinaryFunction::readSampleData() {
bool NormalizeByCalls = BC.DR.usesEvent("branches");
static bool NagUser{true};
if (NagUser) {
outs() << "BOLT-INFO: operating with non-LBR profiling data.\n";
outs()
<< "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
if (NormalizeByInsnCount) {
outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
} else if (NormalizeByCalls) {
Expand Down
214 changes: 176 additions & 38 deletions bolt/DataAggregator.cpp
Expand Up @@ -42,6 +42,13 @@ TimeAggregator("time-aggr",
cl::ZeroOrMore,
cl::cat(AggregatorCategory));

static llvm::cl::opt<bool>
BasicAggregation("nl",
cl::desc("aggregate basic samples (without LBR info)"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(AggregatorCategory));

}

namespace {
Expand Down Expand Up @@ -85,11 +92,18 @@ void DataAggregator::abort() {
bool DataAggregator::launchPerfBranchEventsNoWait() {
SmallVector<const char*, 4> Argv;

outs() << "PERF2BOLT: Spawning perf-script job to read branch events\n";
if (opts::BasicAggregation)
outs()
<< "PERF2BOLT: Spawning perf-script job to read events without LBR\n";
else
outs() << "PERF2BOLT: Spawning perf-script job to read branch events\n";
Argv.push_back(PerfPath.data());
Argv.push_back("script");
Argv.push_back("-F");
Argv.push_back("pid,brstack");
if (opts::BasicAggregation)
Argv.push_back("pid,event,ip");
else
Argv.push_back("pid,brstack");
Argv.push_back("-i");
Argv.push_back(PerfDataFilename.data());
Argv.push_back(nullptr);
Expand Down Expand Up @@ -396,8 +410,9 @@ bool DataAggregator::aggregate(BinaryContext &BC,
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
if (parseBranchEvents()) {
outs() << "PERF2BOLT: Failed to parse branch events\n";
if ((!opts::BasicAggregation && parseBranchEvents()) ||
(opts::BasicAggregation && parseBasicEvents())) {
outs() << "PERF2BOLT: Failed to parse samples\n";
}

// Mark all functions with registered events as having a valid profile.
Expand Down Expand Up @@ -463,16 +478,30 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) {
}

bool
DataAggregator::doIntraBranch(BinaryFunction *Func, const LBREntry &Branch) {
FuncBranchData *AggrData = Func->getBranchData();
DataAggregator::doSample(BinaryFunction &Func, uint64_t Address) {
auto I = FuncsToSamples.find(Func.getNames()[0]);
if (I == FuncsToSamples.end()) {
bool Success;
std::tie(I, Success) = FuncsToSamples.insert(std::make_pair(
Func.getNames()[0],
FuncSampleData(Func.getNames()[0], FuncSampleData::ContainerTy())));
}

I->second.bumpCount(Address - Func.getAddress());
return true;
}

bool
DataAggregator::doIntraBranch(BinaryFunction &Func, const LBREntry &Branch) {
FuncBranchData *AggrData = Func.getBranchData();
if (!AggrData) {
AggrData = &FuncsToBranches[Func->getNames()[0]];
AggrData->Name = Func->getNames()[0];
Func->setBranchData(AggrData);
AggrData = &FuncsToBranches[Func.getNames()[0]];
AggrData->Name = Func.getNames()[0];
Func.setBranchData(AggrData);
}

AggrData->bumpBranchCount(Branch.From - Func->getAddress(),
Branch.To - Func->getAddress(),
AggrData->bumpBranchCount(Branch.From - Func.getAddress(),
Branch.To - Func.getAddress(),
Branch.Mispred);
return true;
}
Expand Down Expand Up @@ -531,7 +560,7 @@ bool DataAggregator::doBranch(const LBREntry &Branch) {
Branch.To - FromFunc->getAddress(),
1,
Branch.Mispred);
return doIntraBranch(FromFunc, Branch);
return doIntraBranch(*FromFunc, Branch);
}

return doInterBranch(FromFunc, ToFunc, Branch);
Expand Down Expand Up @@ -562,7 +591,7 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second) {
}

for (const auto &Pair : *FTs) {
doIntraBranch(FromFunc,
doIntraBranch(*FromFunc,
LBREntry{Pair.first + FromFunc->getAddress(),
Pair.second + FromFunc->getAddress(),
false});
Expand Down Expand Up @@ -663,6 +692,38 @@ ErrorOr<PerfBranchSample> DataAggregator::parseBranchSample() {
return Res;
}

ErrorOr<PerfBasicSample> DataAggregator::parseBasicSample() {
while (checkAndConsumeFS()) {}

auto PIDRes = parseNumberField(FieldSeparator, true);
if (std::error_code EC = PIDRes.getError())
return EC;
if (!PIDs.empty() && !PIDs.count(PIDRes.get())) {
consumeRestOfLine();
return PerfBasicSample{StringRef(), 0};
}

while (checkAndConsumeFS()) {}

auto Event = parseString(FieldSeparator);
if (std::error_code EC = Event.getError())
return EC;

while (checkAndConsumeFS()) {}

auto AddrRes = parseHexField(FieldSeparator, true);
if (std::error_code EC = AddrRes.getError()) {
return EC;
}

if (!checkAndConsumeNewLine()) {
reportError("expected end of line");
return make_error_code(llvm::errc::io_error);
}

return PerfBasicSample{Event.get(), AddrRes.get()};
}

ErrorOr<PerfMemSample> DataAggregator::parseMemSample() {
PerfMemSample Res{0,0};

Expand Down Expand Up @@ -701,7 +762,10 @@ ErrorOr<PerfMemSample> DataAggregator::parseMemSample() {
return EC;
}

checkAndConsumeNewLine();
if (!checkAndConsumeNewLine()) {
reportError("expected end of line");
return make_error_code(llvm::errc::io_error);
}

return PerfMemSample{PCRes.get(), AddrRes.get()};
}
Expand Down Expand Up @@ -784,6 +848,64 @@ std::error_code DataAggregator::parseBranchEvents() {
return std::error_code();
}

std::error_code DataAggregator::parseBasicEvents() {
outs() << "PERF2BOLT: Aggregating basic events (without LBR)...\n";
NamedRegionTimer T("parseBasic", "Perf samples parsing", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
uint64_t NumSamples{0};
uint64_t OutOfRangeSamples{0};
while (hasData()) {
auto SampleRes = parseBasicSample();
if (std::error_code EC = SampleRes.getError())
return EC;

auto &Sample = SampleRes.get();
if (!Sample.PC)
continue;

++NumSamples;
auto *Func = getBinaryFunctionContainingAddress(Sample.PC);
if (!Func) {
++OutOfRangeSamples;
continue;
}

doSample(*Func, Sample.PC);
EventNames.insert(Sample.EventName);
}
outs() << "PERF2BOLT: Read " << NumSamples << " samples\n";

outs() << "PERF2BOLT: Out of range samples recorded in unknown regions: "
<< OutOfRangeSamples;
float Perc{0.0f};
if (NumSamples > 0) {
outs() << " (";
Perc = OutOfRangeSamples * 100.0f / NumSamples;
if (outs().has_colors()) {
if (Perc > 60.0f) {
outs().changeColor(raw_ostream::RED);
} else if (Perc > 40.0f) {
outs().changeColor(raw_ostream::YELLOW);
} else {
outs().changeColor(raw_ostream::GREEN);
}
}
outs() << format("%.1f%%", Perc);
if (outs().has_colors())
outs().resetColor();
outs() << ")";
}
outs() << "\n";
if (Perc > 80.0f) {
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance.\n\n";
}

return std::error_code();
}

std::error_code DataAggregator::parseMemEvents() {
outs() << "PERF2BOLT: Aggregating memory events...\n";
NamedRegionTimer T("memevents", "Mem samples parsing", TimerGroupName,
Expand Down Expand Up @@ -950,36 +1072,52 @@ std::error_code DataAggregator::writeAggregatedFile() const {
uint64_t BranchValues{0};
uint64_t MemValues{0};

for (const auto &Func : FuncsToBranches) {
for (const auto &BI : Func.getValue().Data) {
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
if (opts::BasicAggregation) {
OutFile << "no_lbr";
for (const auto &Entry : EventNames) {
OutFile << " " << Entry.getKey();
}
OutFile << "\n";

for (const auto &Func : FuncsToSamples) {
for (const auto &SI : Func.getValue().Data) {
writeLocation(SI.Loc);
OutFile << SI.Hits << "\n";
++BranchValues;
}
}
for (const auto &BI : Func.getValue().EntryData) {
// Do not output if source is a known symbol, since this was already
// accounted for in the source function
if (BI.From.IsSymbol)
continue;
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
} else {
for (const auto &Func : FuncsToBranches) {
for (const auto &BI : Func.getValue().Data) {
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
}
for (const auto &BI : Func.getValue().EntryData) {
// Do not output if source is a known symbol, since this was already
// accounted for in the source function
if (BI.From.IsSymbol)
continue;
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
}
}
}

WriteMemLocs = true;
for (const auto &Func : FuncsToMemEvents) {
for (const auto &MemEvent : Func.getValue().Data) {
writeLocation(MemEvent.Offset);
writeLocation(MemEvent.Addr);
OutFile << MemEvent.Count << "\n";
++MemValues;
WriteMemLocs = true;
for (const auto &Func : FuncsToMemEvents) {
for (const auto &MemEvent : Func.getValue().Data) {
writeLocation(MemEvent.Offset);
writeLocation(MemEvent.Addr);
OutFile << MemEvent.Count << "\n";
++MemValues;
}
}
}

outs() << "PERF2BOLT: Wrote " << BranchValues << " branch objects and "
outs() << "PERF2BOLT: Wrote " << BranchValues << " objects and "
<< MemValues << " memory objects to " << OutputFDataName << "\n";

return std::error_code();
Expand Down
17 changes: 16 additions & 1 deletion bolt/DataAggregator.h
Expand Up @@ -32,6 +32,11 @@ struct PerfBranchSample {
SmallVector<LBREntry, 16> LBR;
};

struct PerfBasicSample {
StringRef EventName;
uint64_t PC;
};

struct PerfMemSample {
uint64_t PC;
uint64_t Addr;
Expand Down Expand Up @@ -119,8 +124,11 @@ class DataAggregator : public DataReader {
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address);

/// Semantic actions - parser hooks to interpret parsed perf samples
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address
bool doSample(BinaryFunction &Func, const uint64_t Address);

/// Register an intraprocedural branch \p Branch.
bool doIntraBranch(BinaryFunction *Func, const LBREntry &Branch);
bool doIntraBranch(BinaryFunction &Func, const LBREntry &Branch);

/// Register an interprocedural branch from \p FromFunc to \p ToFunc with
/// offsets \p From and \p To, respectively.
Expand All @@ -142,6 +150,10 @@ class DataAggregator : public DataReader {
/// LBR entries
ErrorOr<PerfBranchSample> parseBranchSample();

/// Parse a single perf sample containing a PID associated with an event name
/// and a PC
ErrorOr<PerfBasicSample> parseBasicSample();

/// Parse a single perf sample containing a PID associated with an IP and
/// address.
ErrorOr<PerfMemSample> parseMemSample();
Expand All @@ -159,6 +171,9 @@ class DataAggregator : public DataReader {
/// Parse the full output generated by perf script to report LBR samples.
std::error_code parseBranchEvents();

/// Parse the full output generated by perf script to report non-LBR samples.
std::error_code parseBasicEvents();

/// Parse the full output generated by perf script to report memory events.
std::error_code parseMemEvents();

Expand Down

0 comments on commit db949fc

Please sign in to comment.