Skip to content

Commit

Permalink
[memprof] Add Version2 of IndexedMemProfRecord serialization (#87455)
Browse files Browse the repository at this point in the history
I'm currently developing a new version of the indexed memprof format
where we deduplicate call stacks in IndexedAllocationInfo::CallStack
and IndexedMemProfRecord::CallSites.  We refer to call stacks with
integer IDs, namely CallStackId, just as we refer to Frame with
FrameId.  The deduplication will cut down the profile file size by 80%
in a large memprof file of mine.

As a step toward the goal, this patch teaches
IndexedMemProfRecord::{serialize,deserialize} to speak Version2.  A
subsequent patch will add Version2 support to llvm-profdata.

The essense of the patch is to replace the serialization of a call
stack, a vector of FrameIDs, with that of a CallStackId.  That is:

  const IndexedAllocationInfo &N = ...;
  ...
  LE.write<uint64_t>(N.CallStack.size());
  for (const FrameId &Id : N.CallStack)
    LE.write<FrameId>(Id);

becomes:

  LE.write<CallStackId>(N.CSId);
  • Loading branch information
kazutakahirata committed Apr 4, 2024
1 parent 3a7b522 commit d89914f
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 54 deletions.
61 changes: 22 additions & 39 deletions llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t {
Version0 = 0,
// Version 1: Added a version field to the header.
Version1 = 1,
// Version 2: Added a call stack table. Under development.
Version2 = 2,
};

constexpr uint64_t MinimumSupportedVersion = Version0;
Expand Down Expand Up @@ -289,23 +291,14 @@ struct IndexedAllocationInfo {
: CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}

// Returns the size in bytes when this allocation info struct is serialized.
size_t serializedSize() const {
return sizeof(uint64_t) + // The number of frames to serialize.
sizeof(FrameId) * CallStack.size() + // The callstack frame ids.
PortableMemInfoBlock::serializedSize(); // The size of the payload.
}
size_t serializedSize(IndexedVersion Version) const;

bool operator==(const IndexedAllocationInfo &Other) const {
if (Other.Info != Info)
return false;

if (Other.CallStack.size() != CallStack.size())
if (Other.CSId != CSId)
return false;

for (size_t J = 0; J < Other.CallStack.size(); J++) {
if (Other.CallStack[J] != CallStack[J])
return false;
}
return true;
}

Expand Down Expand Up @@ -357,6 +350,9 @@ struct IndexedMemProfRecord {
// inline location list may include additional entries, users should pick
// the last entry in the list with the same function GUID.
llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
// Conceptually the same as above. We are going to keep both CallSites and
// CallSiteIds while we are transitioning from CallSites to CallSiteIds.
llvm::SmallVector<CallStackId> CallSiteIds;

void clear() {
AllocSites.clear();
Expand All @@ -370,47 +366,31 @@ struct IndexedMemProfRecord {
CallSites.append(Other.CallSites);
}

size_t serializedSize() const {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : AllocSites)
Result += N.serializedSize();

// The number of callsites we have information for.
Result += sizeof(uint64_t);
for (const auto &Frames : CallSites) {
// The number of frame ids to serialize.
Result += sizeof(uint64_t);
Result += Frames.size() * sizeof(FrameId);
}
return Result;
}
size_t serializedSize(IndexedVersion Version) const;

bool operator==(const IndexedMemProfRecord &Other) const {
if (Other.AllocSites.size() != AllocSites.size())
return false;

if (Other.CallSites.size() != CallSites.size())
return false;

for (size_t I = 0; I < AllocSites.size(); I++) {
if (AllocSites[I] != Other.AllocSites[I])
return false;
}

for (size_t I = 0; I < CallSites.size(); I++) {
if (CallSites[I] != Other.CallSites[I])
return false;
}
if (Other.CallSiteIds != CallSiteIds)
return false;
return true;
}

// Serializes the memprof records in \p Records to the ostream \p OS based
// on the schema provided in \p Schema.
void serialize(const MemProfSchema &Schema, raw_ostream &OS);
void serialize(const MemProfSchema &Schema, raw_ostream &OS,
IndexedVersion Version);

// Deserializes memprof records from the Buffer.
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
const unsigned char *Buffer);
const unsigned char *Buffer,
IndexedVersion Version);

// Returns the GUID for the function name after canonicalization. For
// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
Expand Down Expand Up @@ -480,7 +460,8 @@ class RecordLookupTrait {
using offset_type = uint64_t;

RecordLookupTrait() = delete;
RecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
: Version(V), Schema(S) {}

static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
static uint64_t GetInternalKey(uint64_t K) { return K; }
Expand All @@ -507,19 +488,21 @@ class RecordLookupTrait {

data_type ReadData(uint64_t K, const unsigned char *D,
offset_type /*Unused*/) {
Record = IndexedMemProfRecord::deserialize(Schema, D);
Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
return Record;
}

private:
// Holds the MemProf version.
IndexedVersion Version;
// Holds the memprof schema used to deserialize records.
MemProfSchema Schema;
// Holds the records from one function deserialized from the indexed format.
IndexedMemProfRecord Record;
};

// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
class RecordWriterTrait {
template <IndexedVersion Version> class RecordWriterTrait {
public:
using key_type = uint64_t;
using key_type_ref = uint64_t;
Expand All @@ -546,7 +529,7 @@ class RecordWriterTrait {
endian::Writer LE(Out, llvm::endianness::little);
offset_type N = sizeof(K);
LE.write<offset_type>(N);
offset_type M = V.serializedSize();
offset_type M = V.serializedSize(Version);
LE.write<offset_type>(M);
return std::make_pair(N, M);
}
Expand All @@ -560,7 +543,7 @@ class RecordWriterTrait {
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
offset_type /*Unused*/) {
assert(Schema != nullptr && "MemProf schema is not initialized!");
V.serialize(*Schema, Out);
V.serialize(*Schema, Out, Version);
// Clear the IndexedMemProfRecord which results in clearing/freeing its
// vectors of allocs and callsites. This is owned by the associated on-disk
// hash table, but unused after this point. See also the comment added to
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/ProfileData/InstrProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() {
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Ptr,
/*Base=*/Start, memprof::RecordLookupTrait(Schema)));
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));

// Initialize the frame table reader with the payload and bucket offsets.
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
OS.write(static_cast<uint64_t>(Id));
}

auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
auto RecordWriter =
std::make_unique<memprof::RecordWriterTrait<memprof::Version1>>();
RecordWriter->Schema = &Schema;
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
OnDiskChainedHashTableGenerator<
memprof::RecordWriterTrait<memprof::Version1>>
RecordTableGenerator;
for (auto &I : MemProfRecordData) {
// Insert the key (func hash) and value (memprof record).
Expand Down
170 changes: 161 additions & 9 deletions llvm/lib/ProfileData/MemProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,139 @@

namespace llvm {
namespace memprof {
namespace {
size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
size_t Size = 0;
// The number of frames to serialize.
Size += sizeof(uint64_t);
// The callstack frame ids.
Size += sizeof(FrameId) * IAI.CallStack.size();
// The size of the payload.
Size += PortableMemInfoBlock::serializedSize();
return Size;
}

void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
raw_ostream &OS) {
size_t serializedSizeV2(const IndexedAllocationInfo &IAI) {
size_t Size = 0;
// The CallStackId
Size += sizeof(CallStackId);
// The size of the payload.
Size += PortableMemInfoBlock::serializedSize();
return Size;
}
} // namespace

size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
switch (Version) {
case Version0:
case Version1:
return serializedSizeV0(*this);
case Version2:
return serializedSizeV2(*this);
}
llvm_unreachable("unsupported MemProf version");
}

namespace {
size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version0);

// The number of callsites we have information for.
Result += sizeof(uint64_t);
for (const auto &Frames : Record.CallSites) {
// The number of frame ids to serialize.
Result += sizeof(uint64_t);
Result += Frames.size() * sizeof(FrameId);
}
return Result;
}

size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version2);

// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The CallStackId
Result += Record.CallSiteIds.size() * sizeof(CallStackId);
return Result;
}
} // namespace

size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const {
switch (Version) {
case Version0:
case Version1:
return serializedSizeV0(*this);
case Version2:
return serializedSizeV2(*this);
}
llvm_unreachable("unsupported MemProf version");
}

namespace {
void serializeV0(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;

endian::Writer LE(OS, llvm::endianness::little);

LE.write<uint64_t>(AllocSites.size());
for (const IndexedAllocationInfo &N : AllocSites) {
LE.write<uint64_t>(Record.AllocSites.size());
for (const IndexedAllocationInfo &N : Record.AllocSites) {
LE.write<uint64_t>(N.CallStack.size());
for (const FrameId &Id : N.CallStack)
LE.write<FrameId>(Id);
N.Info.serialize(Schema, OS);
}

// Related contexts.
LE.write<uint64_t>(CallSites.size());
for (const auto &Frames : CallSites) {
LE.write<uint64_t>(Record.CallSites.size());
for (const auto &Frames : Record.CallSites) {
LE.write<uint64_t>(Frames.size());
for (const FrameId &Id : Frames)
LE.write<FrameId>(Id);
}
}

IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr) {
void serializeV2(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;

endian::Writer LE(OS, llvm::endianness::little);

LE.write<uint64_t>(Record.AllocSites.size());
for (const IndexedAllocationInfo &N : Record.AllocSites) {
LE.write<CallStackId>(N.CSId);
N.Info.serialize(Schema, OS);
}

// Related contexts.
LE.write<uint64_t>(Record.CallSiteIds.size());
for (const auto &CSId : Record.CallSiteIds)
LE.write<CallStackId>(CSId);
}
} // namespace

void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
raw_ostream &OS, IndexedVersion Version) {
switch (Version) {
case Version0:
case Version1:
serializeV0(*this, Schema, OS);
return;
case Version2:
serializeV2(*this, Schema, OS);
return;
}
llvm_unreachable("unsupported MemProf version");
}

namespace {
IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;

IndexedMemProfRecord Record;
Expand Down Expand Up @@ -73,11 +179,57 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
Frames.push_back(Id);
}
Record.CallSites.push_back(Frames);
Record.CallSiteIds.push_back(hashCallStack(Frames));
}

return Record;
}

IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;

IndexedMemProfRecord Record;

// Read the meminfo nodes.
const uint64_t NumNodes =
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
for (uint64_t I = 0; I < NumNodes; I++) {
IndexedAllocationInfo Node;
Node.CSId =
endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
Node.Info.deserialize(Schema, Ptr);
Ptr += PortableMemInfoBlock::serializedSize();
Record.AllocSites.push_back(Node);
}

// Read the callsite information.
const uint64_t NumCtxs =
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
for (uint64_t J = 0; J < NumCtxs; J++) {
CallStackId CSId =
endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
Record.CallSiteIds.push_back(CSId);
}

return Record;
}
} // namespace

IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr,
IndexedVersion Version) {
switch (Version) {
case Version0:
case Version1:
return deserializeV0(Schema, Ptr);
case Version2:
return deserializeV2(Schema, Ptr);
}
llvm_unreachable("unsupported MemProf version");
}

GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
// Canonicalize the function name to drop suffixes such as ".llvm.". Note
// we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
Expand Down
Loading

0 comments on commit d89914f

Please sign in to comment.