Skip to content

Commit

Permalink
[BOLT] Update SDTs based on translation tables
Browse files Browse the repository at this point in the history
Summary:
We've used to emit special annotations to update SDT markers. However,
we can just use "Offset" annotations for the same purpose. Unlike BAT,
we have to generate "reverse" address translation tables.
This approach eliminates reliance on instructions after code emission.

(cherry picked from FBD18318660)
  • Loading branch information
maksfb committed Nov 4, 2019
1 parent 98e6361 commit f2b257b
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 197 deletions.
23 changes: 23 additions & 0 deletions bolt/src/BinaryBasicBlock.cpp
Expand Up @@ -15,6 +15,7 @@
#include "ParallelUtilities.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include <limits>
Expand Down Expand Up @@ -589,5 +590,27 @@ BinaryBasicBlock *BinaryBasicBlock::splitAt(iterator II) {
return NewBlock;
}

void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) {
if (!LocSyms)
return;

const auto BBAddress = getOutputAddressRange().first;
const auto BBOffset = Layout.getSymbolOffset(*getLabel());
for (const auto &LocSymKV : *LocSyms) {
const uint32_t InputFunctionOffset = LocSymKV.first;
const uint32_t OutputOffset = static_cast<uint32_t>(
Layout.getSymbolOffset(*LocSymKV.second) - BBOffset);
getOffsetTranslationTable().emplace_back(
std::make_pair(OutputOffset, InputFunctionOffset));

// Update reverse (relative to BAT) address lookup table for function.
if (getFunction()->hasSDTMarker()) {
getFunction()->getInputOffsetToAddressMap().emplace(
std::make_pair(InputFunctionOffset, OutputOffset + BBAddress));
}
}
LocSyms.reset(nullptr);
}

} // namespace bolt
} // namespace llvm
19 changes: 13 additions & 6 deletions bolt/src/BinaryBasicBlock.h
Expand Up @@ -86,16 +86,20 @@ class BinaryBasicBlock {
/// Original offset range of the basic block in the function.
std::pair<uint32_t, uint32_t> InputRange{INVALID_OFFSET, INVALID_OFFSET};

/// Map input offset of an instruction to an output symbol. Enables writing
/// bolt address translation tables, used for mapping control transfer in the
/// output binary back to the original binary.
/// Map input offset (from function start) of an instruction to an output
/// symbol. Enables writing BOLT address translation tables used for mapping
/// control transfer in the output binary back to the original binary.
using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>;
std::unique_ptr<LocSymsTy> LocSyms;

/// Map input offsets in the basic block to output offsets.
/// After output/codegen, map output offsets of instructions in this basic
/// block to instruction offsets in the original function. Note that the
/// output basic block could be different from the input basic block.
/// We only map instruction of interest, such as calls, and sdt markers.
///
/// NOTE: map only instruction of interest, such as calls.
using OffsetTranslationTableTy = std::vector<std::pair<uint16_t, uint16_t>>;
/// We store the offset array in a basic block to facilitate BAT tables
/// generation. Otherwise, the mapping could be done at function level.
using OffsetTranslationTableTy = std::vector<std::pair<uint32_t, uint32_t>>;
std::unique_ptr<OffsetTranslationTableTy> OffsetTranslationTable;

/// Alignment requirements for the block.
Expand Down Expand Up @@ -828,6 +832,9 @@ class BinaryBasicBlock {
return OutputAddressRange;
}

/// Update addresses of special instructions inside this basic block.
void updateOutputValues(const MCAsmLayout &Layout);

/// Return mapping of input offsets to symbols in the output.
LocSymsTy &getLocSyms() {
return LocSyms ? *LocSyms : *(LocSyms = std::make_unique<LocSymsTy>());
Expand Down
143 changes: 113 additions & 30 deletions bolt/src/BinaryFunction.cpp
Expand Up @@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
Expand Down Expand Up @@ -1730,22 +1731,13 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
const auto InstrInputAddr = I->first + Address;
bool IsSDTMarker =
MIB->isNoop(Instr) && BC.SDTMarkers.count(InstrInputAddr);

if (IsSDTMarker) {
HasSDTMarker = true;
DEBUG(dbgs() << "SDTMarker detected in the input at : "
<< utohexstr(InstrInputAddr) << "\n");

MIB->addAnnotation<uint64_t>(Instr, "SDTMarker", InstrInputAddr,
AllocatorId);

// This mutex is used to lock concurrent writes to GlobalSymbols and
// BinaryDataMap that happens in registerNameAtAddress
{
static std::shared_timed_mutex GlobalSymbolCreationMtx;
std::unique_lock<std::shared_timed_mutex> Lock(GlobalSymbolCreationMtx);
BC.SDTMarkers[InstrInputAddr].Label =
getOrCreateLocalLabel(InstrInputAddr);
if (!MIB->hasAnnotation(Instr, "Offset")) {
MIB->addAnnotation(Instr, "Offset", static_cast<uint32_t>(Offset),
AllocatorId);
}
}

Expand Down Expand Up @@ -1947,11 +1939,10 @@ void BinaryFunction::postProcessCFG() {
clearList(IgnoredBranches);
clearList(EntryOffsets);

// Remove "Offset" annotations, unless we need to write a BOLT address
// translation table later. This has no cost, since annotations are allocated
// by a bumpptr allocator and won't be released anyway until late in the
// pipeline.
if (!opts::EnableBAT && !opts::Instrument)
// Remove "Offset" annotations, unless we need an address-translation table
// later. This has no cost, since annotations are allocated by a bumpptr
// allocator and won't be released anyway until late in the pipeline.
if (!requiresAddressTranslation() && !opts::Instrument)
for (auto *BB : layout())
for (auto &Inst : *BB)
BC.MIB->removeAnnotation(Inst, "Offset");
Expand Down Expand Up @@ -2655,6 +2646,10 @@ bool BinaryFunction::finalizeCFIState() {
return true;
}

bool BinaryFunction::requiresAddressTranslation() const {
return opts::EnableBAT || hasSDTMarker();
}

uint64_t BinaryFunction::getInstructionCount() const {
uint64_t Count = 0;
for (auto &Block : BasicBlocksLayout) {
Expand All @@ -2673,7 +2668,7 @@ uint64_t BinaryFunction::getEditDistance() const {
}

void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart,
bool EmitCodeOnly, bool LabelsForOffsets) {
bool EmitCodeOnly) {
if (!EmitCodeOnly && EmitColdPart && hasConstantIsland())
duplicateConstantIslands();

Expand Down Expand Up @@ -2741,25 +2736,14 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart,

// Prepare to tag this location with a label if we need to keep track of
// the location of calls/returns for BOLT address translation maps
if (!EmitCodeOnly && LabelsForOffsets &&
if (!EmitCodeOnly && requiresAddressTranslation() &&
BC.MIB->hasAnnotation(Instr, "Offset")) {
const auto Offset = BC.MIB->getAnnotationAs<uint32_t>(Instr, "Offset");
MCSymbol *LocSym = BC.Ctx->createTempSymbol(/*CanBeUnnamed=*/true);
Streamer.EmitLabel(LocSym);
BB->getLocSyms().emplace_back(std::make_pair(Offset, LocSym));
}

// Emit SDT labels
if (!EmitCodeOnly && BC.MIB->hasAnnotation(Instr, "SDTMarker")) {
auto OriginalAddress =
BC.MIB->tryGetAnnotationAs<uint64_t>(Instr, "SDTMarker").get();
auto *SDTLabel = BC.SDTMarkers[OriginalAddress].Label;

// A given symbol should only be emitted as a label once
if (SDTLabel->isUndefined())
Streamer.EmitLabel(SDTLabel);
}

Streamer.EmitInstruction(Instr, *BC.STI);
LastIsPrefix = BC.MIB->isPrefix(Instr);
}
Expand Down Expand Up @@ -4058,6 +4042,98 @@ void BinaryFunction::calculateLoopInfo() {
}
}

void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
if (!isEmitted()) {
assert(!isInjected() && "injected function should be emitted");
setOutputAddress(getAddress());
setOutputSize(getSize());
return;
}

const auto BaseAddress = getCodeSection()->getOutputAddress();
auto ColdSection = getColdCodeSection();
const auto ColdBaseAddress =
isSplit() ? ColdSection->getOutputAddress() : 0;
if (BC.HasRelocations || isInjected()) {
const auto StartOffset = Layout.getSymbolOffset(*getSymbol());
const auto EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
setOutputAddress(BaseAddress + StartOffset);
setOutputSize(EndOffset - StartOffset);
if (hasConstantIsland()) {
const auto DataOffset =
Layout.getSymbolOffset(*getFunctionConstantIslandLabel());
setOutputDataAddress(BaseAddress + DataOffset);
}
if (isSplit()) {
const auto *ColdStartSymbol = getColdSymbol();
assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
"split function should have defined cold symbol");
const auto *ColdEndSymbol = getFunctionColdEndLabel();
assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
"split function should have defined cold end symbol");
const auto ColdStartOffset = Layout.getSymbolOffset(*ColdStartSymbol);
const auto ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
cold().setAddress(ColdBaseAddress + ColdStartOffset);
cold().setImageSize(ColdEndOffset - ColdStartOffset);
if (hasConstantIsland()) {
const auto DataOffset = Layout.getSymbolOffset(
*getFunctionColdConstantIslandLabel());
setOutputColdDataAddress(ColdBaseAddress + DataOffset);
}
}
} else {
setOutputAddress(getAddress());
setOutputSize(
Layout.getSymbolOffset(*getFunctionEndLabel()));
}

// Update basic block output ranges for the debug info, if we have
// secondary entry points in the symbol table to update or if writing BAT.
if (!opts::UpdateDebugSections && !isMultiEntry() &&
!requiresAddressTranslation())
return;

// Output ranges should match the input if the body hasn't changed.
if (!isSimple() && !BC.HasRelocations)
return;

// AArch64 may have functions that only contains a constant island (no code).
if (layout_begin() == layout_end())
return;

BinaryBasicBlock *PrevBB = nullptr;
for (auto BBI = layout_begin(), BBE = layout_end(); BBI != BBE; ++BBI) {
auto *BB = *BBI;
assert(BB->getLabel()->isDefined() && "symbol should be defined");
const auto BBBaseAddress = BB->isCold() ? ColdBaseAddress : BaseAddress;
if (!BC.HasRelocations) {
if (BB->isCold()) {
assert(BBBaseAddress == cold().getAddress());
} else {
assert(BBBaseAddress == getOutputAddress());
}
}
const auto BBOffset = Layout.getSymbolOffset(*BB->getLabel());
const auto BBAddress = BBBaseAddress + BBOffset;
BB->setOutputStartAddress(BBAddress);

if (PrevBB) {
auto PrevBBEndAddress = BBAddress;
if (BB->isCold() != PrevBB->isCold()) {
PrevBBEndAddress =
getOutputAddress() + getOutputSize();
}
PrevBB->setOutputEndAddress(PrevBBEndAddress);
}
PrevBB = BB;

BB->updateOutputValues(Layout);
}
PrevBB->setOutputEndAddress(PrevBB->isCold() ?
cold().getAddress() + cold().getImageSize() :
getOutputAddress() + getOutputSize());
}

DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
DebugAddressRangesVector OutputRanges;

Expand Down Expand Up @@ -4093,6 +4169,13 @@ uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
if (Address < getAddress())
return 0;

// Check if the address is associated with an instruction that is tracked
// by address translation.
auto KV = InputOffsetToAddressMap.find(Address - getAddress());
if (KV != InputOffsetToAddressMap.end()) {
return KV->second;
}

// FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
// intact. Instead we can use pseudo instructions and/or annotations.
const auto Offset = Address - getAddress();
Expand Down
27 changes: 23 additions & 4 deletions bolt/src/BinaryFunction.h
Expand Up @@ -249,6 +249,9 @@ class BinaryFunction {
/// the control to any basic block of its parent or its sibling.
bool IsFragment{false};

/// Indicate that the function body has SDT marker
bool HasSDTMarker{false};

/// The address for the code for this function in codegen memory.
uint64_t ImageAddress{0};

Expand Down Expand Up @@ -314,9 +317,6 @@ class BinaryFunction {
/// Function order for streaming into the destination binary.
uint32_t Index{-1U};

/// Indicate that the function body has SDT marker
bool HasSDTMarker{false};

/// Get basic block index assuming it belongs to this function.
unsigned getIndex(const BinaryBasicBlock *BB) const {
assert(BB->getIndex() < BasicBlocks.size());
Expand Down Expand Up @@ -542,6 +542,11 @@ class BinaryFunction {
/// Count the number of functions created.
static uint64_t Count;

/// Map offsets of special instructions to addresses in the output.
using InputOffsetToAddressMapTy = std::unordered_map<uint64_t, uint64_t>;
InputOffsetToAddressMapTy InputOffsetToAddressMap;

private:
/// Register alternative function name.
void addAlternativeName(std::string NewName) {
Names.emplace_back(NewName);
Expand Down Expand Up @@ -1117,6 +1122,16 @@ class BinaryFunction {
PLTSymbol = Symbol;
}

/// Update output values of the function based on the final \p Layout.
void updateOutputValues(const MCAsmLayout &Layout);

/// Return mapping of input to output addresses. Most users should call
/// translateInputToOutputAddress() for address translation.
InputOffsetToAddressMapTy &getInputOffsetToAddressMap() {
assert(isEmitted() && "cannot use address mapping before code emission");
return InputOffsetToAddressMap;
}

/// Register relocation type \p RelType at a given \p Address in the function
/// against \p Symbol.
/// Assert if the \p Address is not inside this function.
Expand Down Expand Up @@ -2083,6 +2098,10 @@ class BinaryFunction {
/// is corrupted. If it is unable to fix it, it returns false.
bool finalizeCFIState();

/// Return true if this function needs an address-transaltion table after
/// its code emission.
bool requiresAddressTranslation() const;

/// Adjust branch instructions to match the CFG.
///
/// As it comes to internal branches, the CFG represents "the ultimate source
Expand Down Expand Up @@ -2124,7 +2143,7 @@ class BinaryFunction {
/// Emit function code. The caller is responsible for emitting function
/// symbol(s) and setting the section to emit the code to.
void emitBody(MCStreamer &Streamer, bool EmitColdPart,
bool EmitCodeOnly = false, bool LabelsForOffsets = false);
bool EmitCodeOnly = false);

/// Emit function as a blob with relocations and labels for relocations.
void emitBodyRaw(MCStreamer *Streamer);
Expand Down
3 changes: 0 additions & 3 deletions bolt/src/BinarySection.h
Expand Up @@ -461,9 +461,6 @@ struct SDTMarkerInfo {

/// The offset of PC within the note section
unsigned PCOffset;

/// A label that marks the location of the SDT nop instruction
MCSymbol *Label;
};

} // namespace bolt
Expand Down
4 changes: 2 additions & 2 deletions bolt/src/BoltAddressTranslation.cpp
Expand Up @@ -43,8 +43,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
Map[BBOutputOffset] = BBInputOffset;

for (const auto &IOPair : BB.getOffsetTranslationTable()) {
const auto InputOffset = IOPair.first + BBInputOffset;
const auto OutputOffset = IOPair.second + BBOutputOffset;
const auto OutputOffset = IOPair.first + BBOutputOffset;
const auto InputOffset = IOPair.second;

// Is this the first instruction in the BB? No need to duplicate the entry.
if (OutputOffset == BBOutputOffset)
Expand Down

0 comments on commit f2b257b

Please sign in to comment.