Skip to content

Commit

Permalink
[memprof] Symbolize and cache stack frames.
Browse files Browse the repository at this point in the history
Currently, symbolization of stack frames occurs on demand when the instrprof writer
iterates over all the records in the raw memprof reader. With this
change we symbolize and cache the frames immediately after reading the
raw profiles. For a large internal binary this results in a runtime
reduction of ~50% (2m -> 48s) when merging a memprof raw profile with a
raw instr profile to generate an indexed profile. This change also makes
it simpler in the future to generate additional calling context
metadata to attach to each memprof record.

Differential Revision: https://reviews.llvm.org/D120430
  • Loading branch information
snehasish committed Mar 3, 2022
1 parent 569d0cc commit dda7b74
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 29 deletions.
2 changes: 1 addition & 1 deletion llvm/include/llvm/ProfileData/MemProf.h
Expand Up @@ -196,7 +196,7 @@ struct MemProfRecord {
};

// The dynamic calling context for the allocation.
std::vector<Frame> CallStack;
llvm::SmallVector<Frame> CallStack;
// The statistics obtained from the runtime for the allocation.
PortableMemInfoBlock Info;

Expand Down
17 changes: 15 additions & 2 deletions llvm/include/llvm/ProfileData/RawMemProfReader.h
Expand Up @@ -32,7 +32,7 @@ namespace memprof {

// Map from id (recorded from sanitizer stack depot) to virtual addresses for
// each program counter address in the callstack.
using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t, 32>>;
using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;

class RawMemProfReader {
public:
Expand Down Expand Up @@ -75,14 +75,23 @@ class RawMemProfReader {
llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
CallStackMap &SM)
: Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
ProfileData(Prof), StackMap(SM) {}
ProfileData(Prof), StackMap(SM) {
// We don't call initialize here since there is no raw profile to read. The
// test should pass in the raw profile as structured data.

// If there is an error here then the mock symbolizer has not been
// initialized properly.
if (Error E = symbolizeStackFrames())
report_fatal_error(std::move(E));
}

private:
RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
object::OwningBinary<object::Binary> &&Bin)
: DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {}
Error initialize();
Error readRawProfile();
Error symbolizeStackFrames();

object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
Expand All @@ -102,6 +111,10 @@ class RawMemProfReader {
llvm::MapVector<uint64_t, MemInfoBlock> ProfileData;
CallStackMap StackMap;

// Cached symbolization from PC to Frame.
llvm::DenseMap<uint64_t, llvm::SmallVector<MemProfRecord::Frame>>
SymbolizedFrame;

// Iterator to read from the ProfileData MapVector.
llvm::MapVector<uint64_t, MemInfoBlock>::iterator Iter = ProfileData.end();
};
Expand Down
69 changes: 44 additions & 25 deletions llvm/lib/ProfileData/RawMemProfReader.cpp
Expand Up @@ -132,7 +132,7 @@ CallStackMap readStackInfo(const char *Ptr) {
const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);

SmallVector<uint64_t, 32> CallStack;
SmallVector<uint64_t> CallStack;
for (uint64_t J = 0; J < NumPCs; J++) {
CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
}
Expand Down Expand Up @@ -273,7 +273,46 @@ Error RawMemProfReader::initialize() {
return report(SOFOr.takeError(), FileName);
Symbolizer = std::move(SOFOr.get());

return readRawProfile();
if (Error E = readRawProfile())
return E;

return symbolizeStackFrames();
}

Error RawMemProfReader::symbolizeStackFrames() {
// The specifier to use when symbolization is requested.
const DILineInfoSpecifier Specifier(
DILineInfoSpecifier::FileLineInfoKind::RawValue,
DILineInfoSpecifier::FunctionNameKind::LinkageName);

for (const auto &Entry : StackMap) {
for (const uint64_t VAddr : Entry.getSecond()) {
// Check if we have already symbolized and cached the result.
if (SymbolizedFrame.count(VAddr) > 0)
continue;

Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
if (!DIOr)
return DIOr.takeError();
DIInliningInfo DI = DIOr.get();

for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
const auto &Frame = DI.getFrame(I);
SymbolizedFrame[VAddr].emplace_back(
// We use the function guid which we expect to be a uint64_t. At
// this time, it is the lower 64 bits of the md5 of the function
// name. Any suffix with .llvm. is trimmed since these are added by
// thinLTO global promotion. At the time the profile is consumed,
// these suffixes will not be present.
Function::getGUID(trimSuffix(Frame.FunctionName)),
Frame.Line - Frame.StartLine, Frame.Column,
// Only the first entry is not an inlined location.
I != 0);
}
}
}
return Error::success();
}

Error RawMemProfReader::readRawProfile() {
Expand Down Expand Up @@ -347,30 +386,10 @@ RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
MemProfRecord &Record) {
auto &CallStack = StackMap[Id];
DILineInfoSpecifier Specifier(
DILineInfoSpecifier::FileLineInfoKind::RawValue,
DILineInfoSpecifier::FunctionNameKind::LinkageName);
for (const uint64_t Address : CallStack) {
Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false);

if (!DIOr)
return DIOr.takeError();
DIInliningInfo DI = DIOr.get();

for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
const auto &Frame = DI.getFrame(I);
Record.CallStack.emplace_back(
// We use the function guid which we expect to be a uint64_t. At this
// time, it is the lower 64 bits of the md5 of the function name. Any
// suffix with .llvm. is trimmed since these are added by thinLTO
// global promotion. At the time the profile is consumed, these
// suffixes will not be present.
Function::getGUID(trimSuffix(Frame.FunctionName)),
Frame.Line - Frame.StartLine, Frame.Column,
// Only the first entry is not an inlined location.
I != 0);
}
assert(SymbolizedFrame.count(Address) &&
"Address not found in symbolized frame cache.");
Record.CallStack.append(SymbolizedFrame[Address]);
}
Record.Info = PortableMemInfoBlock(MIB);
return Error::success();
Expand Down
2 changes: 1 addition & 1 deletion llvm/unittests/ProfileData/MemProfTest.cpp
Expand Up @@ -132,7 +132,7 @@ TEST(MemProf, FillsValue) {

EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x2000},
specifier(), false))
.Times(2)
.Times(1) // Only once since we cache the result for future lookups.
.WillRepeatedly(Return(makeInliningInfo({
{"foo", 10, 5, 30},
{"bar", 201, 150, 20},
Expand Down

0 comments on commit dda7b74

Please sign in to comment.