Skip to content

Commit

Permalink
[BOLT] Calculate input to output address map using BOLTLinker
Browse files Browse the repository at this point in the history
BOLT uses MCAsmLayout to calculate the output values of basic blocks.
This means output values are calculated based on a pre-linking state and
any changes to symbol values during linking will cause incorrect values
to be used.

This issue was first addressed in D154604 by adding all basic block
symbols to the symbol table for the linker to resolve them. However, the
runtime overhead of handling this huge symbol table turned out to be
prohibitively large.

This patch solves the issue in a different way. First, a temporary
section containing [input address, output symbol] pairs is emitted to the
intermediary object file. The linker will resolve all these references
so we end up with a section of [input address, output address] pairs.
This section is then parsed and used to:
- Replace BinaryBasicBlock::OffsetTranslationTable
- Replace BinaryFunction::InputOffsetToAddressMap
- Update BinaryBasicBlock::OutputAddressRange

Note that the reason this is more performant than the previous attempt
is that these symbol references do not cause entries to be added to the
symbol table. Instead, section-relative references are used for the
relocations.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D155604
  • Loading branch information
mtvec committed Aug 21, 2023
1 parent b09c575 commit 23c8d38
Show file tree
Hide file tree
Showing 13 changed files with 183 additions and 70 deletions.
59 changes: 59 additions & 0 deletions bolt/include/bolt/Core/AddressMap.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//===- bolt/Core/AddressMap.h - Input-output address map --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Helper class to create a mapping from input to output addresses needed for
// updating debugging symbols and BAT. We emit an MCSection containing
// <Input address, Output MCSymbol> pairs to the object file and JITLink will
// transform this in <Input address, Output address> pairs. The linker output
// can then be parsed and used to establish the mapping.
//
//===----------------------------------------------------------------------===//
//
#ifndef BOLT_CORE_ADDRESS_MAP_H
#define BOLT_CORE_ADDRESS_MAP_H

#include "llvm/ADT/StringRef.h"

#include <optional>
#include <unordered_map>

namespace llvm {

class MCStreamer;

namespace bolt {

class BinaryContext;

class AddressMap {
using MapTy = std::unordered_multimap<uint64_t, uint64_t>;
MapTy Map;

public:
static const char *const SectionName;

static void emit(MCStreamer &Streamer, BinaryContext &BC);
static AddressMap parse(StringRef Buffer, const BinaryContext &BC);

std::optional<uint64_t> lookup(uint64_t InputAddress) const {
auto It = Map.find(InputAddress);
if (It != Map.end())
return It->second;
return std::nullopt;
}

std::pair<MapTy::const_iterator, MapTy::const_iterator>
lookupAll(uint64_t InputAddress) const {
return Map.equal_range(InputAddress);
}
};

} // namespace bolt
} // namespace llvm

#endif
26 changes: 1 addition & 25 deletions bolt/include/bolt/Core/BinaryBasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,6 @@ class BinaryBasicBlock {
using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>;
std::unique_ptr<LocSymsTy> LocSyms;

/// After output/codegen, map output offsets of instructions in this basic
/// block to instruction offsets in the original function. Note that the
/// output basic block could be different from the input basic block.
/// We only map instruction of interest, such as calls and markers.
///
/// We store the offset array in a basic block to facilitate BAT tables
/// generation. Otherwise, the mapping could be done at function level.
using OffsetTranslationTableTy = std::vector<std::pair<uint32_t, uint32_t>>;
std::unique_ptr<OffsetTranslationTableTy> OffsetTranslationTable;

/// Alignment requirements for the block.
uint32_t Alignment{1};

Expand Down Expand Up @@ -828,8 +818,7 @@ class BinaryBasicBlock {
return OutputAddressRange;
}

/// Update addresses of special instructions inside this basic block.
void updateOutputValues(const MCAsmLayout &Layout);
bool hasLocSyms() const { return LocSyms != nullptr; }

/// Return mapping of input offsets to symbols in the output.
LocSymsTy &getLocSyms() {
Expand All @@ -841,19 +830,6 @@ class BinaryBasicBlock {
return const_cast<BinaryBasicBlock *>(this)->getLocSyms();
}

/// Return offset translation table for the basic block.
OffsetTranslationTableTy &getOffsetTranslationTable() {
return OffsetTranslationTable
? *OffsetTranslationTable
: *(OffsetTranslationTable =
std::make_unique<OffsetTranslationTableTy>());
}

/// Return offset translation table for the basic block.
const OffsetTranslationTableTy &getOffsetTranslationTable() const {
return const_cast<BinaryBasicBlock *>(this)->getOffsetTranslationTable();
}

/// Return size of the basic block in the output binary.
uint64_t getOutputSize() const {
return OutputAddressRange.second - OutputAddressRange.first;
Expand Down
10 changes: 10 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#ifndef BOLT_CORE_BINARY_CONTEXT_H
#define BOLT_CORE_BINARY_CONTEXT_H

#include "bolt/Core/AddressMap.h"
#include "bolt/Core/BinaryData.h"
#include "bolt/Core/BinarySection.h"
#include "bolt/Core/DebugData.h"
Expand Down Expand Up @@ -221,6 +222,9 @@ class BinaryContext {
bool ContainsDwarf5{false};
bool ContainsDwarfLegacy{false};

/// Mapping from input to output addresses.
std::optional<AddressMap> IOAddressMap;

/// Preprocess DWO debug information.
void preprocessDWODebugInfo();

Expand Down Expand Up @@ -1343,6 +1347,12 @@ class BinaryContext {
/* DWARFMustBeAtTheEnd */ false));
return Streamer;
}

void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); }
const AddressMap &getIOAddressMap() const {
assert(IOAddressMap && "Address map not set yet");
return *IOAddressMap;
}
};

template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
Expand Down
15 changes: 5 additions & 10 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -577,9 +577,6 @@ class BinaryFunction {
/// Count the number of functions created.
static uint64_t Count;

/// Map offsets of special instructions to addresses in the output.
InputOffsetToAddressMapTy InputOffsetToAddressMap;

/// Register alternative function name.
void addAlternativeName(std::string NewName) {
Aliases.push_back(std::move(NewName));
Expand Down Expand Up @@ -1226,13 +1223,6 @@ class BinaryFunction {
/// Update output values of the function based on the final \p Layout.
void updateOutputValues(const MCAsmLayout &Layout);

/// Return mapping of input to output addresses. Most users should call
/// translateInputToOutputAddress() for address translation.
InputOffsetToAddressMapTy &getInputOffsetToAddressMap() {
assert(isEmitted() && "cannot use address mapping before code emission");
return InputOffsetToAddressMap;
}

/// Register relocation type \p RelType at a given \p Address in the function
/// against \p Symbol.
/// Assert if the \p Address is not inside this function.
Expand Down Expand Up @@ -2180,6 +2170,11 @@ class BinaryFunction {
/// its code emission.
bool requiresAddressTranslation() const;

/// Return true if the linker needs to generate an address map for this
/// function. Used for keeping track of the mapping from input to out
/// addresses of basic blocks.
bool requiresAddressMap() const;

/// Adjust branch instructions to match the CFG.
///
/// As it comes to internal branches, the CFG represents "the ultimate source
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/BinarySection.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class BinarySection {
mutable bool IsReordered{false}; // Have the contents been reordered?
bool IsAnonymous{false}; // True if the name should not be included
// in the output file.
bool IsLinkOnly{false}; // True if the section should not be included
// in the output file.

uint64_t hash(const BinaryData &BD,
std::map<const BinaryData *, uint64_t> &Cache) const;
Expand Down Expand Up @@ -452,6 +454,8 @@ class BinarySection {
void setIndex(uint32_t I) { Index = I; }
void setOutputName(const Twine &Name) { OutputName = Name.str(); }
void setAnonymous(bool Flag) { IsAnonymous = Flag; }
bool isLinkOnly() const { return IsLinkOnly; }
void setLinkOnly() { IsLinkOnly = true; }

/// Emit the section as data, possibly with relocations.
/// Use name \p SectionName for the section during the emission.
Expand Down
63 changes: 63 additions & 0 deletions bolt/lib/Core/AddressMap.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include "bolt/Core/AddressMap.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/DataExtractor.h"

namespace llvm {
namespace bolt {

const char *const AddressMap::SectionName = ".bolt.address_map";

static void emitLabel(MCStreamer &Streamer, uint64_t InputAddress,
const MCSymbol *OutputLabel) {
Streamer.emitIntValue(InputAddress, 8);
Streamer.emitSymbolValue(OutputLabel, 8);
}

void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) {
Streamer.switchSection(BC.getDataSection(SectionName));

for (const auto &[BFAddress, BF] : BC.getBinaryFunctions()) {
if (!BF.requiresAddressMap())
continue;

for (const auto &BB : BF) {
if (!BB.getLabel()->isDefined())
continue;

emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first,
BB.getLabel());

if (!BB.hasLocSyms())
continue;

for (auto [Offset, Symbol] : BB.getLocSyms())
emitLabel(Streamer, BFAddress + Offset, Symbol);
}
}
}

AddressMap AddressMap::parse(StringRef Buffer, const BinaryContext &BC) {
const auto EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");

DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
BC.AsmInfo->getCodePointerSize());
DataExtractor::Cursor Cursor(0);

AddressMap Parsed;
Parsed.Map.reserve(Buffer.size() / EntrySize);

while (Cursor && !DE.eof(Cursor)) {
const auto Input = DE.getAddress(Cursor);
const auto Output = DE.getAddress(Cursor);
Parsed.Map.insert({Input, Output});
}

assert(Cursor && "Error reading address map section");
return Parsed;
}

} // namespace bolt
} // namespace llvm
22 changes: 0 additions & 22 deletions bolt/lib/Core/BinaryBasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,27 +613,5 @@ BinaryBasicBlock *BinaryBasicBlock::splitAt(iterator II) {
return NewBlock;
}

void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) {
if (!LocSyms)
return;

const uint64_t BBAddress = getOutputAddressRange().first;
const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel());
for (const auto &LocSymKV : *LocSyms) {
const uint32_t InputFunctionOffset = LocSymKV.first;
const uint32_t OutputOffset = static_cast<uint32_t>(
Layout.getSymbolOffset(*LocSymKV.second) - BBOffset);
getOffsetTranslationTable().emplace_back(
std::make_pair(OutputOffset, InputFunctionOffset));

// Update reverse (relative to BAT) address lookup table for function.
if (getFunction()->requiresAddressTranslation()) {
getFunction()->getInputOffsetToAddressMap().emplace(
std::make_pair(InputFunctionOffset, OutputOffset + BBAddress));
}
}
LocSyms.reset(nullptr);
}

} // namespace bolt
} // namespace llvm
4 changes: 4 additions & 0 deletions bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ void BinaryEmitter::emitAll(StringRef OrgSecPrefix) {
}

emitDataSections(OrgSecPrefix);

// TODO Enable for Mach-O once BinaryContext::getDataSection supports it.
if (BC.isELF())
AddressMap::emit(Streamer, BC);
}

void BinaryEmitter::emitFunctions() {
Expand Down
19 changes: 12 additions & 7 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2855,6 +2855,14 @@ bool BinaryFunction::requiresAddressTranslation() const {
return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
}

bool BinaryFunction::requiresAddressMap() const {
if (isInjected())
return false;

return opts::UpdateDebugSections || isMultiEntry() ||
requiresAddressTranslation();
}

uint64_t BinaryFunction::getInstructionCount() const {
uint64_t Count = 0;
for (const BinaryBasicBlock &BB : blocks())
Expand Down Expand Up @@ -4120,15 +4128,13 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
assert(FragmentBaseAddress == getOutputAddress());
}

const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
const uint64_t BBAddress = FragmentBaseAddress + BBOffset;
const uint64_t BBAddress =
*BC.getIOAddressMap().lookup(BB->getInputOffset() + getAddress());
BB->setOutputStartAddress(BBAddress);

if (PrevBB)
PrevBB->setOutputEndAddress(BBAddress);
PrevBB = BB;

BB->updateOutputValues(Layout);
}

PrevBB->setOutputEndAddress(PrevBB->isSplit()
Expand Down Expand Up @@ -4181,9 +4187,8 @@ uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {

// Check if the address is associated with an instruction that is tracked
// by address translation.
auto KV = InputOffsetToAddressMap.find(Address - getAddress());
if (KV != InputOffsetToAddressMap.end())
return KV->second;
if (auto OutputAddress = BC.getIOAddressMap().lookup(Address))
return *OutputAddress;

// FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
// intact. Instead we can use pseudo instructions and/or annotations.
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
)

add_llvm_library(LLVMBOLTCore
AddressMap.cpp
BinaryBasicBlock.cpp
BinaryContext.cpp
BinaryData.cpp
Expand Down
11 changes: 8 additions & 3 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,14 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
// allowing it to overwrite the previously inserted key in the map.
Map[BBOutputOffset] = BBInputOffset;

for (const auto &IOPair : BB.getOffsetTranslationTable()) {
const uint64_t OutputOffset = IOPair.first + BBOutputOffset;
const uint32_t InputOffset = IOPair.second;
const auto &IOAddressMap =
BB.getFunction()->getBinaryContext().getIOAddressMap();

for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
const auto OutputAddress = IOAddressMap.lookup(InputAddress);
assert(OutputAddress && "Unknown instruction address");
const auto OutputOffset = *OutputAddress - FuncAddress;

// Is this the first instruction in the BB? No need to duplicate the entry.
if (OutputOffset == BBOutputOffset)
Expand Down
4 changes: 1 addition & 3 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
// A call probe may be duplicated due to ICP
// Go through output of InputOffsetToAddressMap to collect all related
// probes
const InputOffsetToAddressMapTy &Offset2Addr =
F->getInputOffsetToAddressMap();
auto CallOutputAddresses = Offset2Addr.equal_range(Offset);
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
auto CallOutputAddress = CallOutputAddresses.first;
if (CallOutputAddress == CallOutputAddresses.second) {
Probe->setAddress(INT64_MAX);
Expand Down

0 comments on commit 23c8d38

Please sign in to comment.