Skip to content

Commit

Permalink
[memprof] Reduce schema for Version2 (#89876)
Browse files Browse the repository at this point in the history
Curently, the compiler only uses several fields of MemoryInfoBlock.
Serializing all fields into the indexed MemProf file simply wastes
storage.

This patch limits the schema down to four fields for Version2 by
default.  It retains the old behavior of serializing all fields via:

  llvm-profdata merge --memprof-version=2 --memprof-full-schema

This patch reduces the size of the indexed MemProf profile I have by
40% (1.6GB down to 1.0GB).
  • Loading branch information
kazutakahirata committed Apr 24, 2024
1 parent a10d67f commit 4c8ec8f
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 45 deletions.
7 changes: 6 additions & 1 deletion llvm/include/llvm/ProfileData/InstrProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,15 @@ class InstrProfWriter {
// The MemProf version we should write.
memprof::IndexedVersion MemProfVersionRequested;

// Whether to serialize the full schema.
bool MemProfFullSchema;

public:
InstrProfWriter(
bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0,
uint64_t MaxTemporalProfTraceLength = 0, bool WritePrevVersion = false,
memprof::IndexedVersion MemProfVersionRequested = memprof::Version0);
memprof::IndexedVersion MemProfVersionRequested = memprof::Version0,
bool MemProfFullSchema = false);
~InstrProfWriter();

StringMap<ProfilingData> &getProfileData() { return FunctionData; }
Expand Down Expand Up @@ -203,6 +207,7 @@ class InstrProfWriter {
void setMemProfVersionRequested(memprof::IndexedVersion Version) {
MemProfVersionRequested = Version;
}
void setMemProfFullSchema(bool Full) { MemProfFullSchema = Full; }
// Compute the overlap b/w this object and Other. Program level result is
// stored in Overlap and function level result is stored in FuncLevelOverlap.
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
Expand Down
9 changes: 8 additions & 1 deletion llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,21 @@ struct PortableMemInfoBlock {
void clear() { *this = PortableMemInfoBlock(); }

// Returns the full schema currently in use.
static MemProfSchema getSchema() {
static MemProfSchema getFullSchema() {
MemProfSchema List;
#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
return List;
}

// Returns the schema consisting of the fields currently consumed by the
// compiler.
static MemProfSchema getHotColdSchema() {
return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
Meta::TotalLifetimeAccessDensity};
}

bool operator==(const PortableMemInfoBlock &Other) const {
#define MIBEntryDef(NameTag, Name, Type) \
if (Other.get##Name() != get##Name()) \
Expand Down
23 changes: 14 additions & 9 deletions llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,13 @@ class InstrProfRecordWriterTrait {
InstrProfWriter::InstrProfWriter(
bool Sparse, uint64_t TemporalProfTraceReservoirSize,
uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
memprof::IndexedVersion MemProfVersionRequested)
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)
: Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
InfoObj(new InstrProfRecordWriterTrait()),
WritePrevVersion(WritePrevVersion),
MemProfVersionRequested(MemProfVersionRequested) {}
MemProfVersionRequested(MemProfVersionRequested),
MemProfFullSchema(MemProfFullSchema) {}

InstrProfWriter::~InstrProfWriter() { delete InfoObj; }

Expand Down Expand Up @@ -507,7 +508,7 @@ static Error writeMemProfV0(
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
OS.write(0ULL); // Reserve space for the memprof frame table offset.

auto Schema = memprof::PortableMemInfoBlock::getSchema();
auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
writeMemProfSchema(OS, Schema);

uint64_t RecordTableOffset =
Expand All @@ -533,7 +534,7 @@ static Error writeMemProfV1(
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
OS.write(0ULL); // Reserve space for the memprof frame table offset.

auto Schema = memprof::PortableMemInfoBlock::getSchema();
auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
writeMemProfSchema(OS, Schema);

uint64_t RecordTableOffset =
Expand All @@ -554,7 +555,8 @@ static Error writeMemProfV2(
&MemProfRecordData,
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
&MemProfCallStackData) {
&MemProfCallStackData,
bool MemProfFullSchema) {
OS.write(memprof::Version2);
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof record table offset.
Expand All @@ -563,7 +565,9 @@ static Error writeMemProfV2(
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof call stack table offset.

auto Schema = memprof::PortableMemInfoBlock::getSchema();
auto Schema = memprof::PortableMemInfoBlock::getHotColdSchema();
if (MemProfFullSchema)
Schema = memprof::PortableMemInfoBlock::getFullSchema();
writeMemProfSchema(OS, Schema);

uint64_t RecordTableOffset =
Expand Down Expand Up @@ -605,7 +609,7 @@ static Error writeMemProf(
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
&MemProfCallStackData,
memprof::IndexedVersion MemProfVersionRequested) {
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) {

switch (MemProfVersionRequested) {
case memprof::Version0:
Expand All @@ -614,7 +618,7 @@ static Error writeMemProf(
return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData);
case memprof::Version2:
return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData,
MemProfCallStackData);
MemProfCallStackData, MemProfFullSchema);
}

return make_error<InstrProfError>(
Expand Down Expand Up @@ -733,7 +737,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
MemProfSectionStart = OS.tell();
if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData,
MemProfCallStackData, MemProfVersionRequested))
MemProfCallStackData, MemProfVersionRequested,
MemProfFullSchema))
return E;
}

Expand Down
3 changes: 3 additions & 0 deletions llvm/test/tools/llvm-profdata/memprof-merge-v0.test
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ RUN: llvm-profdata show %t.prof.v1 | FileCheck %s
RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
RUN: llvm-profdata show %t.prof.v2 | FileCheck %s

RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
RUN: llvm-profdata show %t.prof.v2 | FileCheck %s

For now we only check the validity of the instrumented profile since we don't
have a way to display the contents of the memprof indexed format yet.

Expand Down
6 changes: 5 additions & 1 deletion llvm/tools/llvm-profdata/llvm-profdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,10 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
clEnumValN(memprof::Version1, "1", "version 1"),
clEnumValN(memprof::Version2, "2", "version 2")));

cl::opt<bool> MemProfFullSchema(
"memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
cl::desc("Use the full schema for serialization"), cl::init(false));

// Options specific to overlap subcommand.
cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
cl::desc("<base profile file>"),
Expand Down Expand Up @@ -600,7 +604,7 @@ struct WriterContext {
SmallSet<instrprof_error, 4> &WriterErrorCodes,
uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
: Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
MemProfVersionRequested),
MemProfVersionRequested, MemProfFullSchema),
ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
};

Expand Down
142 changes: 109 additions & 33 deletions llvm/unittests/ProfileData/InstrProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,12 +370,31 @@ static CallStackIdMapTy getCallStackMapping() {
return Mapping;
}

// Populate all of the fields of MIB.
MemInfoBlock makeFullMIB() {
MemInfoBlock MIB;
#define MIBEntryDef(NameTag, Name, Type) MIB.NameTag;
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
return MIB;
}

// Populate those fields returned by getHotColdSchema.
MemInfoBlock makePartialMIB() {
MemInfoBlock MIB;
MIB.AllocCount = 1;
MIB.TotalSize = 5;
MIB.TotalLifetime = 10;
MIB.TotalLifetimeAccessDensity = 23;
return MIB;
}

IndexedMemProfRecord makeRecord(
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
AllocFrames,
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
CallSiteFrames,
const MemInfoBlock &Block = MemInfoBlock()) {
const MemInfoBlock &Block = makeFullMIB()) {
llvm::memprof::IndexedMemProfRecord MR;
for (const auto &Frames : AllocFrames)
MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
Expand All @@ -388,7 +407,7 @@ IndexedMemProfRecord makeRecord(
IndexedMemProfRecord
makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames,
const MemInfoBlock &Block = MemInfoBlock()) {
const MemInfoBlock &Block) {
llvm::memprof::IndexedMemProfRecord MR;
for (const auto &CSId : AllocFrames)
// We don't populate IndexedAllocationInfo::CallStack because we use it only
Expand Down Expand Up @@ -476,15 +495,56 @@ TEST_F(InstrProfTest, test_memprof_v0) {
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}

TEST_F(InstrProfTest, test_memprof_v2) {
struct CallStackIdConverter {
std::optional<memprof::FrameId> LastUnmappedFrameId;
std::optional<memprof::CallStackId> LastUnmappedCSId;

const FrameIdMapTy &IdToFrameMap;
const CallStackIdMapTy &CSIdToCallStackMap;

CallStackIdConverter() = delete;
CallStackIdConverter(const FrameIdMapTy &IdToFrameMap,
const CallStackIdMapTy &CSIdToCallStackMap)
: IdToFrameMap(IdToFrameMap), CSIdToCallStackMap(CSIdToCallStackMap) {}

llvm::SmallVector<memprof::Frame>
operator()(::llvm::memprof::CallStackId CSId) {
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
auto Iter = IdToFrameMap.find(Id);
if (Iter == IdToFrameMap.end()) {
LastUnmappedFrameId = Id;
return memprof::Frame(0, 0, 0, false);
}
return Iter->second;
};

llvm::SmallVector<memprof::Frame> Frames;
auto CSIter = CSIdToCallStackMap.find(CSId);
if (CSIter == CSIdToCallStackMap.end()) {
LastUnmappedCSId = CSId;
} else {
const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
CSIter->getSecond();
Frames.reserve(CS.size());
for (::llvm::memprof::FrameId Id : CS)
Frames.push_back(IdToFrameCallback(Id));
}
return Frames;
}
};

TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
const MemInfoBlock MIB = makeFullMIB();

Writer.setMemProfVersionRequested(memprof::Version2);
Writer.setMemProfFullSchema(true);

ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());

const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{0x333});
/*CallSiteFrames=*/{0x333}, MIB);
const FrameIdMapTy IdToFrameMap = getFrameMapping();
const auto CSIdToCallStackMap = getCallStackMapping();
for (const auto &I : IdToFrameMap) {
Expand All @@ -502,38 +562,54 @@ TEST_F(InstrProfTest, test_memprof_v2) {
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
const memprof::MemProfRecord &Record = RecordOr.get();

std::optional<memprof::FrameId> LastUnmappedFrameId;
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
auto Iter = IdToFrameMap.find(Id);
if (Iter == IdToFrameMap.end()) {
LastUnmappedFrameId = Id;
return memprof::Frame(0, 0, 0, false);
}
return Iter->second;
};
CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);

std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId;
auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) {
llvm::SmallVector<memprof::Frame> Frames;
auto CSIter = CSIdToCallStackMap.find(CSId);
if (CSIter == CSIdToCallStackMap.end()) {
LastUnmappedCSId = CSId;
} else {
const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
CSIter->getSecond();
Frames.reserve(CS.size());
for (::llvm::memprof::FrameId Id : CS)
Frames.push_back(IdToFrameCallback(Id));
}
return Frames;
};
const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdConv);
ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
<< "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
<< "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}

TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
const MemInfoBlock MIB = makePartialMIB();

Writer.setMemProfVersionRequested(memprof::Version2);
Writer.setMemProfFullSchema(false);

ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());

const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{0x333}, MIB);
const FrameIdMapTy IdToFrameMap = getFrameMapping();
const auto CSIdToCallStackMap = getCallStackMapping();
for (const auto &I : IdToFrameMap) {
Writer.addMemProfFrame(I.first, I.getSecond(), Err);
}
for (const auto &I : CSIdToCallStackMap) {
Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
}
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);

auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));

auto RecordOr = Reader->getMemProfRecord(0x9999);
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
const memprof::MemProfRecord &Record = RecordOr.get();

CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);

const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
ASSERT_EQ(LastUnmappedFrameId, std::nullopt)
<< "could not map frame id: " << *LastUnmappedFrameId;
ASSERT_EQ(LastUnmappedCSId, std::nullopt)
<< "could not map call stack id: " << *LastUnmappedCSId;
IndexedMR.toMemProfRecord(CSIdConv);
ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
<< "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
<< "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}

Expand Down

0 comments on commit 4c8ec8f

Please sign in to comment.