Skip to content

Commit

Permalink
Support for remapping profile data when symbols change, for
Browse files Browse the repository at this point in the history
instrumentation-based profiling.

Reviewers: davidxl, tejohnson, dlj, erik.pilkington

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D51247

llvm-svn: 344184
  • Loading branch information
zygoloid committed Oct 10, 2018
1 parent c0b28d5 commit ceed4eb
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 15 deletions.
30 changes: 26 additions & 4 deletions llvm/include/llvm/ProfileData/InstrProfReader.h
Expand Up @@ -348,13 +348,18 @@ struct InstrProfReaderIndexBase {
using OnDiskHashTableImplV3 =
OnDiskIterableChainedHashTable<InstrProfLookupTrait>;

template <typename HashTableImpl>
class InstrProfReaderItaniumRemapper;

template <typename HashTableImpl>
class InstrProfReaderIndex : public InstrProfReaderIndexBase {
private:
std::unique_ptr<HashTableImpl> HashTable;
typename HashTableImpl::data_iterator RecordIterator;
uint64_t FormatVersion;

friend class InstrProfReaderItaniumRemapper<HashTableImpl>;

public:
InstrProfReaderIndex(const unsigned char *Buckets,
const unsigned char *const Payload,
Expand Down Expand Up @@ -386,13 +391,26 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
}
};

/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
class InstrProfReaderRemapper {
public:
virtual ~InstrProfReaderRemapper() {}
virtual Error populateRemappings() { return Error::success(); }
virtual Error getRecords(StringRef FuncName,
ArrayRef<NamedInstrProfRecord> &Data) = 0;
};

/// Reader for the indexed binary instrprof format.
class IndexedInstrProfReader : public InstrProfReader {
private:
/// The profile data file contents.
std::unique_ptr<MemoryBuffer> DataBuffer;
/// The profile remapping file contents.
std::unique_ptr<MemoryBuffer> RemappingBuffer;
/// The index into the profile data.
std::unique_ptr<InstrProfReaderIndexBase> Index;
/// The profile remapping file contents.
std::unique_ptr<InstrProfReaderRemapper> Remapper;
/// Profile summary data.
std::unique_ptr<ProfileSummary> Summary;
// Index to the current record in the record array.
Expand All @@ -404,8 +422,11 @@ class IndexedInstrProfReader : public InstrProfReader {
const unsigned char *Cur);

public:
IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
: DataBuffer(std::move(DataBuffer)), RecordIndex(0) {}
IndexedInstrProfReader(
std::unique_ptr<MemoryBuffer> DataBuffer,
std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
: DataBuffer(std::move(DataBuffer)),
RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;

Expand Down Expand Up @@ -434,10 +455,11 @@ class IndexedInstrProfReader : public InstrProfReader {

/// Factory method to create an indexed reader.
static Expected<std::unique_ptr<IndexedInstrProfReader>>
create(const Twine &Path);
create(const Twine &Path, const Twine &RemappingPath = "");

static Expected<std::unique_ptr<IndexedInstrProfReader>>
create(std::unique_ptr<MemoryBuffer> Buffer);
create(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);

// Used for testing purpose only.
void setValueProfDataEndianness(support::endianness Endianness) {
Expand Down
164 changes: 155 additions & 9 deletions llvm/lib/ProfileData/InstrProfReader.cpp
Expand Up @@ -14,6 +14,7 @@

#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
Expand All @@ -23,6 +24,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SymbolRemappingReader.h"
#include "llvm/Support/SwapByteOrder.h"
#include <algorithm>
#include <cctype>
Expand Down Expand Up @@ -88,24 +90,38 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
}

Expected<std::unique_ptr<IndexedInstrProfReader>>
IndexedInstrProfReader::create(const Twine &Path) {
IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
// Set up the buffer to read.
auto BufferOrError = setupMemoryBuffer(Path);
if (Error E = BufferOrError.takeError())
return std::move(E);
return IndexedInstrProfReader::create(std::move(BufferOrError.get()));

// Set up the remapping buffer if requested.
std::unique_ptr<MemoryBuffer> RemappingBuffer;
std::string RemappingPathStr = RemappingPath.str();
if (!RemappingPathStr.empty()) {
auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
if (Error E = RemappingBufferOrError.takeError())
return std::move(E);
RemappingBuffer = std::move(RemappingBufferOrError.get());
}

return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
std::move(RemappingBuffer));
}

Expected<std::unique_ptr<IndexedInstrProfReader>>
IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<MemoryBuffer> RemappingBuffer) {
// Sanity check the buffer.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
return make_error<InstrProfError>(instrprof_error::too_large);

// Create the reader.
if (!IndexedInstrProfReader::hasFormat(*Buffer))
return make_error<InstrProfError>(instrprof_error::bad_magic);
auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer));
auto Result = llvm::make_unique<IndexedInstrProfReader>(
std::move(Buffer), std::move(RemappingBuffer));

// Initialize the reader and return the result.
if (Error E = initializeReader(*Result))
Expand Down Expand Up @@ -587,6 +603,124 @@ InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
RecordIterator = HashTable->data_begin();
}

namespace {
/// A remapper that does not apply any remappings.
class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
InstrProfReaderIndexBase &Underlying;

public:
InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
: Underlying(Underlying) {}

Error getRecords(StringRef FuncName,
ArrayRef<NamedInstrProfRecord> &Data) override {
return Underlying.getRecords(FuncName, Data);
}
};
}

/// A remapper that applies remappings based on a symbol remapping file.
template <typename HashTableImpl>
class llvm::InstrProfReaderItaniumRemapper
: public InstrProfReaderRemapper {
public:
InstrProfReaderItaniumRemapper(
std::unique_ptr<MemoryBuffer> RemapBuffer,
InstrProfReaderIndex<HashTableImpl> &Underlying)
: RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
}

/// Extract the original function name from a PGO function name.
static StringRef extractName(StringRef Name) {
// We can have multiple :-separated pieces; there can be pieces both
// before and after the mangled name. Find the first part that starts
// with '_Z'; we'll assume that's the mangled name we want.
std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
while (true) {
Parts = Parts.second.split(':');
if (Parts.first.startswith("_Z"))
return Parts.first;
if (Parts.second.empty())
return Name;
}
}

/// Given a mangled name extracted from a PGO function name, and a new
/// form for that mangled name, reconstitute the name.
static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
StringRef Replacement,
SmallVectorImpl<char> &Out) {
Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
Out.insert(Out.end(), Replacement.begin(), Replacement.end());
Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
}

Error populateRemappings() override {
if (Error E = Remappings.read(*RemapBuffer))
return E;
for (StringRef Name : Underlying.HashTable->keys()) {
StringRef RealName = extractName(Name);
if (auto Key = Remappings.insert(RealName)) {
// FIXME: We could theoretically map the same equivalence class to
// multiple names in the profile data. If that happens, we should
// return NamedInstrProfRecords from all of them.
MappedNames.insert({Key, RealName});
}
}
return Error::success();
}

Error getRecords(StringRef FuncName,
ArrayRef<NamedInstrProfRecord> &Data) override {
StringRef RealName = extractName(FuncName);
if (auto Key = Remappings.lookup(RealName)) {
StringRef Remapped = MappedNames.lookup(Key);
if (!Remapped.empty()) {
if (RealName.begin() == FuncName.begin() &&
RealName.end() == FuncName.end())
FuncName = Remapped;
else {
// Try rebuilding the name from the given remapping.
SmallString<256> Reconstituted;
reconstituteName(FuncName, RealName, Remapped, Reconstituted);
Error E = Underlying.getRecords(Reconstituted, Data);
if (!E)
return E;

// If we failed because the name doesn't exist, fall back to asking
// about the original name.
if (Error Unhandled = handleErrors(
std::move(E), [](std::unique_ptr<InstrProfError> Err) {
return Err->get() == instrprof_error::unknown_function
? Error::success()
: Error(std::move(Err));
}))
return Unhandled;
}
}
}
return Underlying.getRecords(FuncName, Data);
}

private:
/// The memory buffer containing the remapping configuration. Remappings
/// holds pointers into this buffer.
std::unique_ptr<MemoryBuffer> RemapBuffer;

/// The mangling remapper.
SymbolRemappingReader Remappings;

/// Mapping from mangled name keys to the name used for the key in the
/// profile data.
/// FIXME: Can we store a location within the on-disk hash table instead of
/// redoing lookup?
DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;

/// The real profile data reader.
InstrProfReaderIndex<HashTableImpl> &Underlying;
};

bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
using namespace support;

Expand Down Expand Up @@ -683,10 +817,22 @@ Error IndexedInstrProfReader::readHeader() {
uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);

// The rest of the file is an on disk hash table.
InstrProfReaderIndexBase *IndexPtr = nullptr;
IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>(
Start + HashOffset, Cur, Start, HashType, FormatVersion);
Index.reset(IndexPtr);
auto IndexPtr =
llvm::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
Start + HashOffset, Cur, Start, HashType, FormatVersion);

// Load the remapping table now if requested.
if (RemappingBuffer) {
Remapper = llvm::make_unique<
InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
std::move(RemappingBuffer), *IndexPtr);
if (Error E = Remapper->populateRemappings())
return E;
} else {
Remapper = llvm::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
}
Index = std::move(IndexPtr);

return success();
}

Expand All @@ -707,7 +853,7 @@ Expected<InstrProfRecord>
IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash) {
ArrayRef<NamedInstrProfRecord> Data;
Error Err = Index->getRecords(FuncName, Data);
Error Err = Remapper->getRecords(FuncName, Data);
if (Err)
return std::move(Err);
// Found it. Look for counters with the right hash.
Expand Down
44 changes: 42 additions & 2 deletions llvm/unittests/ProfileData/InstrProfTest.cpp
Expand Up @@ -42,8 +42,10 @@ struct InstrProfTest : ::testing::Test {

void SetUp() { Writer.setOutputSparse(false); }

void readProfile(std::unique_ptr<MemoryBuffer> Profile) {
auto ReaderOrErr = IndexedInstrProfReader::create(std::move(Profile));
void readProfile(std::unique_ptr<MemoryBuffer> Profile,
std::unique_ptr<MemoryBuffer> Remapping = nullptr) {
auto ReaderOrErr = IndexedInstrProfReader::create(std::move(Profile),
std::move(Remapping));
EXPECT_THAT_ERROR(ReaderOrErr.takeError(), Succeeded());
Reader = std::move(ReaderOrErr.get());
}
Expand Down Expand Up @@ -990,6 +992,44 @@ TEST_P(MaybeSparseInstrProfTest, instr_prof_symtab_compression_test) {
}
}

TEST_P(MaybeSparseInstrProfTest, remapping_test) {
Writer.addRecord({"_Z3fooi", 0x1234, {1, 2, 3, 4}}, Err);
Writer.addRecord({"file:_Z3barf", 0x567, {5, 6, 7}}, Err);
auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile), llvm::MemoryBuffer::getMemBuffer(R"(
type i l
name 3bar 4quux
)"));

std::vector<uint64_t> Counts;
for (StringRef FooName : {"_Z3fooi", "_Z3fool"}) {
EXPECT_THAT_ERROR(Reader->getFunctionCounts(FooName, 0x1234, Counts),
Succeeded());
ASSERT_EQ(4u, Counts.size());
EXPECT_EQ(1u, Counts[0]);
EXPECT_EQ(2u, Counts[1]);
EXPECT_EQ(3u, Counts[2]);
EXPECT_EQ(4u, Counts[3]);
}

for (StringRef BarName : {"file:_Z3barf", "file:_Z4quuxf"}) {
EXPECT_THAT_ERROR(Reader->getFunctionCounts(BarName, 0x567, Counts),
Succeeded());
ASSERT_EQ(3u, Counts.size());
EXPECT_EQ(5u, Counts[0]);
EXPECT_EQ(6u, Counts[1]);
EXPECT_EQ(7u, Counts[2]);
}

for (StringRef BadName : {"_Z3foof", "_Z4quuxi", "_Z3barl", "", "_ZZZ",
"_Z3barf", "otherfile:_Z4quuxf"}) {
EXPECT_THAT_ERROR(Reader->getFunctionCounts(BadName, 0x1234, Counts),
Failed());
EXPECT_THAT_ERROR(Reader->getFunctionCounts(BadName, 0x567, Counts),
Failed());
}
}

TEST_F(SparseInstrProfTest, preserve_no_records) {
Writer.addRecord({"foo", 0x1234, {0}}, Err);
Writer.addRecord({"bar", 0x4321, {0, 0}}, Err);
Expand Down

0 comments on commit ceed4eb

Please sign in to comment.