Skip to content

Commit

Permalink
[BOLT] Get rid of Names in BinaryData
Browse files Browse the repository at this point in the history
Summary:
For BinaryData, we used to maintain a vector of StringRef names and also
a vector of pointers to MCSymbol's associated with the data. There was
an unnecessary duplication of information and an associated overhead of
keeping it in sync. Fix it by removing Names and using Symbols wherever
Names were used.

Also merge two variants of registerNameAtAddress() and remove
unreachable/dead code in the process.

(cherry picked from FBD19359123)
  • Loading branch information
maksfb committed Jan 11, 2020
1 parent 088e3c0 commit 45b27d7
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 100 deletions.
81 changes: 23 additions & 58 deletions bolt/src/BinaryContext.cpp
Expand Up @@ -524,15 +524,14 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
const auto EntrySize = getJumpTableEntrySize(Type);
if (!JTLabel) {
const auto JumpTableName = generateJumpTableName(Function, Address);
JTLabel = Ctx->getOrCreateSymbol(JumpTableName);
registerNameAtAddress(JTLabel->getName(), Address, 0, EntrySize);
JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
}

DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
<< JTLabel->getName()
<< " in function " << Function << 'n');

auto *JT = new JumpTable(JTLabel->getName(),
auto *JT = new JumpTable(*JTLabel,
Address,
EntrySize,
Type,
Expand Down Expand Up @@ -562,7 +561,7 @@ BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
}
assert(Found && "Label not found");
auto *NewLabel = Ctx->createTempSymbol("duplicatedJT", true);
auto *NewJT = new JumpTable(NewLabel->getName(),
auto *NewJT = new JumpTable(*NewLabel,
JT->getAddress(),
JT->EntrySize,
JT->Type,
Expand Down Expand Up @@ -706,59 +705,31 @@ MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
uint64_t Size,
uint16_t Alignment,
unsigned Flags) {
auto SectionOrErr = getSectionForAddress(Address);
auto &Section = SectionOrErr ? SectionOrErr.get() : absoluteSection();
// Register the name with MCContext.
auto *Symbol = Ctx->getOrCreateSymbol(Name);

auto GAI = BinaryDataMap.find(Address);
BinaryData *BD;
if (GAI == BinaryDataMap.end()) {
BD = new BinaryData(Name,
auto SectionOrErr = getSectionForAddress(Address);
auto &Section = SectionOrErr ? SectionOrErr.get() : absoluteSection();
BD = new BinaryData(*Symbol,
Address,
Size,
Alignment ? Alignment : 1,
Section,
Flags);
} else {
BD = GAI->second;
}
return registerNameAtAddress(Name, Address, BD);
}

MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
uint64_t Address,
BinaryData *BD) {
auto GAI = BinaryDataMap.find(Address);
if (GAI != BinaryDataMap.end()) {
if (BD != GAI->second) {
// Note: this could be a source of bugs if client code holds
// on to BinaryData*'s in data structures for any length of time.
auto *OldBD = GAI->second;
BD->merge(GAI->second);
delete OldBD;
GAI->second = BD;
for (auto &Name : BD->names()) {
GlobalSymbols[Name] = BD;
}
updateObjectNesting(GAI);
BD = nullptr;
} else if (!GAI->second->hasName(Name)) {
GAI->second->Names.push_back(Name);
GlobalSymbols[Name] = GAI->second;
} else {
BD = nullptr;
}
} else {
GAI = BinaryDataMap.emplace(Address, BD).first;
GlobalSymbols[Name] = BD;
updateObjectNesting(GAI);
} else {
BD = GAI->second;
if (!BD->hasName(Name)) {
GlobalSymbols[Name] = BD;
BD->Symbols.push_back(Symbol);
}
}

// Register the name with MCContext.
auto *Symbol = Ctx->getOrCreateSymbol(Name);
if (BD) {
BD->Symbols.push_back(Symbol);
assert(BD->Symbols.size() == BD->Names.size() &&
"there should be a 1:1 mapping between names and symbols");
}
return Symbol;
}

Expand Down Expand Up @@ -832,18 +803,15 @@ void BinaryContext::generateSymbolHashes() {
continue;

// First check if a non-anonymous alias exists and move it to the front.
if (BD.getNames().size() > 1) {
auto Itr = std::find_if(BD.Names.begin(),
BD.Names.end(),
[&](const StringRef Name) {
return !isInternalSymbolName(Name);
if (BD.getSymbols().size() > 1) {
auto Itr = std::find_if(BD.getSymbols().begin(),
BD.getSymbols().end(),
[&](const MCSymbol *Symbol) {
return !isInternalSymbolName(Symbol->getName());
});
if (Itr != BD.Names.end()) {
assert(BD.Names.size() == BD.Symbols.size() &&
"there should be a 1:1 mapping between names and symbols");
auto Idx = std::distance(BD.Names.begin(), Itr);
std::swap(BD.Names[0], *Itr);
std::swap(BD.Symbols[0], BD.Symbols[Idx]);
if (Itr != BD.getSymbols().end()) {
auto Idx = std::distance(BD.getSymbols().begin(), Itr);
std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
continue;
}
}
Expand All @@ -869,11 +837,8 @@ void BinaryContext::generateSymbolHashes() {
}
continue;
}
BD.Names.insert(BD.Names.begin(), NewName);
BD.Symbols.insert(BD.Symbols.begin(),
Ctx->getOrCreateSymbol(NewName));
assert(BD.Names.size() == BD.Symbols.size() &&
"there should be a 1:1 mapping between names and symbols");
GlobalSymbols[NewName] = &BD;
}
if (NumCollisions) {
Expand Down
10 changes: 3 additions & 7 deletions bolt/src/BinaryContext.h
Expand Up @@ -592,13 +592,9 @@ class BinaryContext {
uint16_t Alignment = 0,
unsigned Flags = 0);

/// Register a symbol with \p Name at a given \p Address and \p Size.
MCSymbol *registerNameAtAddress(StringRef Name,
uint64_t Address,
BinaryData* BD);

/// Register a symbol with \p Name at a given \p Address, \p Size and
/// /p Flags. See llvm::SymbolRef::Flags for definition of /p Flags.
/// Register a symbol with \p Name at a given \p Address using \p Size,
/// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition
/// of \p Flags.
MCSymbol *registerNameAtAddress(StringRef Name,
uint64_t Address,
uint64_t Size,
Expand Down
37 changes: 27 additions & 10 deletions bolt/src/BinaryData.cpp
Expand Up @@ -48,19 +48,35 @@ void BinaryData::merge(const BinaryData *Other) {
assert(*Section == *Other->Section);
assert(OutputOffset == Other->OutputOffset);
assert(OutputSection == Other->OutputSection);
Names.insert(Names.end(), Other->Names.begin(), Other->Names.end());
Symbols.insert(Symbols.end(), Other->Symbols.begin(), Other->Symbols.end());
MemData.insert(MemData.end(), Other->MemData.begin(), Other->MemData.end());
Flags |= Other->Flags;
if (!Size)
Size = Other->Size;
}

bool BinaryData::hasName(StringRef Name) const {
for (const auto *Symbol : Symbols) {
if (Name == Symbol->getName())
return true;
}
return false;
}

bool BinaryData::hasNameRegex(StringRef NameRegex) const {
Regex MatchName(NameRegex);
for (auto &Name : Names)
if (MatchName.match(Name))
for (const auto *Symbol : Symbols) {
if (MatchName.match(Symbol->getName()))
return true;
}
return false;
}

bool BinaryData::nameStartsWith(StringRef Prefix) const {
for (const auto *Symbol : Symbols) {
if (Symbol->getName().startswith(Prefix))
return true;
}
return false;
}

Expand Down Expand Up @@ -105,10 +121,10 @@ void BinaryData::printBrief(raw_ostream &OS) const {

OS << getName();

if ((opts::PrintSymbolAliases || opts::Verbosity > 1) && Names.size() > 1) {
if ((opts::PrintSymbolAliases || opts::Verbosity > 1) && Symbols.size() > 1) {
OS << ", aliases:";
for (unsigned I = 1u; I < Names.size(); ++I) {
OS << (I == 1 ? " (" : ", ") << Names[I];
for (unsigned I = 1u; I < Symbols.size(); ++I) {
OS << (I == 1 ? " (" : ", ") << Symbols[I]->getName();
}
OS << ")";
}
Expand All @@ -133,18 +149,19 @@ void BinaryData::printBrief(raw_ostream &OS) const {
OS << ")";
}

BinaryData::BinaryData(StringRef Name,
BinaryData::BinaryData(MCSymbol &Symbol,
uint64_t Address,
uint64_t Size,
uint16_t Alignment,
BinarySection &Section,
unsigned Flags)
: Names({Name}),
Section(&Section),
: Section(&Section),
Address(Address),
Size(Size),
Alignment(Alignment),
Flags(Flags),
OutputSection(&Section),
OutputOffset(getOffset())
{ }
{
Symbols.push_back(&Symbol);
}
30 changes: 9 additions & 21 deletions bolt/src/BinaryData.h
Expand Up @@ -41,10 +41,7 @@ class BinaryData {
BinaryData &operator=(const BinaryData &) = delete;

protected:
/// All names associated with this data. The first name is the primary one.
std::vector<std::string> Names;
/// All symbols associated with this data. This vector should have one entry
/// corresponding to every entry in \p Names.
/// All symbols associated with this data.
std::vector<MCSymbol *> Symbols;

/// Section this data belongs to.
Expand Down Expand Up @@ -85,7 +82,7 @@ class BinaryData {

public:
BinaryData(BinaryData &&) = default;
BinaryData(StringRef Name,
BinaryData(MCSymbol &Symbol,
uint64_t Address,
uint64_t Size,
uint16_t Alignment,
Expand All @@ -108,10 +105,6 @@ class BinaryData {
return isTopLevelJumpTable() || !Parent;
}

iterator_range<std::vector<std::string>::const_iterator> names() const {
return make_range(Names.begin(), Names.end());
}

iterator_range<std::vector<MCSymbol *>::const_iterator> symbols() const {
return make_range(Symbols.begin(), Symbols.end());
}
Expand All @@ -120,22 +113,17 @@ class BinaryData {
return make_range(MemData.begin(), MemData.end());
}

StringRef getName() const { return Names.front(); }
const std::vector<std::string> &getNames() const { return Names; }
StringRef getName() const { return getSymbol()->getName(); }

MCSymbol *getSymbol() { return Symbols.front(); }
const MCSymbol *getSymbol() const { return Symbols.front(); }

bool hasName(StringRef Name) const {
return std::find(Names.begin(), Names.end(), Name) != Names.end();
}
const std::vector<MCSymbol *> &getSymbols() const { return Symbols; }
std::vector<MCSymbol *> &getSymbols() { return Symbols; }

bool hasName(StringRef Name) const;
bool hasNameRegex(StringRef Name) const;
bool nameStartsWith(StringRef Prefix) const {
for (const auto &Name : Names) {
if (StringRef(Name).startswith(Prefix))
return true;
}
return false;
}
bool nameStartsWith(StringRef Prefix) const;

bool hasSymbol(const MCSymbol *Symbol) const {
return std::find(Symbols.begin(), Symbols.end(), Symbol) != Symbols.end();
Expand Down
2 changes: 1 addition & 1 deletion bolt/src/BinaryFunctionProfile.cpp
Expand Up @@ -774,7 +774,7 @@ bool BinaryFunction::fetchProfileForOtherEntryPoints() {
uint64_t EntryAddress = BB->getOffset() + getAddress();
// Look for branch data associated with this entry point
if (auto *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
if (FuncBranchData *Data = BC.DR.getFuncBranchData(BD->getNames())) {
if (FuncBranchData *Data = BC.DR.getFuncBranchData(BD->getSymbols())) {
BranchData->appendFrom(*Data, BB->getOffset());
Data->Used = true;
Updated = true;
Expand Down
18 changes: 18 additions & 0 deletions bolt/src/DataReader.cpp
Expand Up @@ -736,6 +736,19 @@ void DataReader::buildLTONameMaps() {
}

namespace {
template <typename MapTy>
decltype(MapTy::MapEntryTy::second) *
fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
// Do a reverse order iteration since the name in profile has a higher chance
// of matching a name at the end of the list.
for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
auto I = Map.find(normalizeName((*SI)->getName()));
if (I != Map.end())
return &I->getValue();
}
return nullptr;
}

template <typename MapTy>
decltype(MapTy::MapEntryTy::second) *
fetchMapEntry(MapTy &Map, const std::vector<std::string> &FuncNames) {
Expand Down Expand Up @@ -785,6 +798,11 @@ DataReader::getFuncBranchData(const std::vector<std::string> &FuncNames) {
return fetchMapEntry<FuncsToBranchesMapTy>(FuncsToBranches, FuncNames);
}

FuncBranchData *
DataReader::getFuncBranchData(const std::vector<MCSymbol *> &Symbols) {
return fetchMapEntry<FuncsToBranchesMapTy>(FuncsToBranches, Symbols);
}

FuncMemData *
DataReader::getFuncMemData(const std::vector<std::string> &FuncNames) {
return fetchMapEntry<FuncsToMemEventsMapTy>(FuncsToMemEvents, FuncNames);
Expand Down
4 changes: 4 additions & 0 deletions bolt/src/DataReader.h
Expand Up @@ -20,6 +20,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorOr.h"
Expand Down Expand Up @@ -361,6 +362,9 @@ class DataReader {
FuncBranchData *
getFuncBranchData(const std::vector<std::string> &FuncNames);

/// Return branch data matching one of the \p Symbols.
FuncBranchData *getFuncBranchData(const std::vector<MCSymbol *> &Symbols);

/// Return mem data matching one of the names in \p FuncNames.
FuncMemData *getFuncMemData(const std::vector<std::string> &FuncNames);

Expand Down
4 changes: 2 additions & 2 deletions bolt/src/JumpTable.cpp
Expand Up @@ -28,14 +28,14 @@ extern cl::opt<JumpTableSupportLevel> JumpTables;
extern cl::opt<unsigned> Verbosity;
}

JumpTable::JumpTable(StringRef Name,
JumpTable::JumpTable(MCSymbol &Symbol,
uint64_t Address,
std::size_t EntrySize,
JumpTableType Type,
LabelMapType &&Labels,
BinaryFunction &BF,
BinarySection &Section)
: BinaryData(Name, Address, 0, EntrySize, Section),
: BinaryData(Symbol, Address, 0, EntrySize, Section),
EntrySize(EntrySize),
OutputEntrySize(EntrySize),
Type(Type),
Expand Down
2 changes: 1 addition & 1 deletion bolt/src/JumpTable.h
Expand Up @@ -89,7 +89,7 @@ class JumpTable : public BinaryData {

private:
/// Constructor should only be called by a BinaryContext.
JumpTable(StringRef Name,
JumpTable(MCSymbol &Symbol,
uint64_t Address,
std::size_t EntrySize,
JumpTableType Type,
Expand Down

0 comments on commit 45b27d7

Please sign in to comment.