Skip to content

Commit

Permalink
[BOLT] Calculate output values using BOLTLinker
Browse files Browse the repository at this point in the history
BOLT uses `MCAsmLayout` to calculate the output values of functions and
basic blocks. This means output values are calculated based on a
pre-linking state and any changes to symbol values during linking will
cause incorrect values to be used.

This issue can be triggered by enabling linker relaxation on RISC-V.
Since linker relaxation can remove instructions, symbol values may
change. This causes, among other things, the symbol table created by
BOLT in the output executable to be incorrect.

This patch solves this issue by using `BOLTLinker` to get symbol values
instead of `MCAsmLayout`. This way, output values are calculated based
on a post-linking state. To make sure the linker can update all
necessary symbols, this patch also makes sure all these symbols are not
marked as temporary so that they end-up in the object file's symbol
table.

Note that this patch only deals with symbols of binary functions
(`BinaryFunction::updateOutputValues`). The technique described above
turned out to be too expensive for basic block symbols so those are
handled differently in D155604.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D154604
  • Loading branch information
mtvec committed Aug 28, 2023
1 parent a470df3 commit 475a93a
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 53 deletions.
6 changes: 3 additions & 3 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,7 @@ class BinaryFunction {

if (!Islands->FunctionConstantIslandLabel) {
Islands->FunctionConstantIslandLabel =
BC.Ctx->createNamedTempSymbol("func_const_island");
BC.Ctx->getOrCreateSymbol("func_const_island@" + getOneName());
}
return Islands->FunctionConstantIslandLabel;
}
Expand All @@ -1201,7 +1201,7 @@ class BinaryFunction {

if (!Islands->FunctionColdConstantIslandLabel) {
Islands->FunctionColdConstantIslandLabel =
BC.Ctx->createNamedTempSymbol("func_cold_const_island");
BC.Ctx->getOrCreateSymbol("func_cold_const_island@" + getOneName());
}
return Islands->FunctionColdConstantIslandLabel;
}
Expand All @@ -1221,7 +1221,7 @@ class BinaryFunction {
}

/// Update output values of the function based on the final \p Layout.
void updateOutputValues(const MCAsmLayout &Layout);
void updateOutputValues(const BOLTLinker &Linker);

/// Register relocation type \p RelType at a given \p Address in the function
/// against \p Symbol.
Expand Down
15 changes: 14 additions & 1 deletion bolt/include/bolt/Core/Linker.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,28 @@ class BOLTLinker {
std::function<void(const BinarySection &Section, uint64_t Address)>;
using SectionsMapper = std::function<void(SectionMapper)>;

struct SymbolInfo {
uint64_t Address;
uint64_t Size;
};

virtual ~BOLTLinker() = default;

/// Load and link \p Obj. \p MapSections will be called before the object is
/// linked to allow section addresses to be remapped. When called, the address
/// of a section can be changed by calling the passed SectionMapper.
virtual void loadObject(MemoryBufferRef Obj, SectionsMapper MapSections) = 0;

/// Return the address and size of a symbol or std::nullopt if it cannot be
/// found.
virtual std::optional<SymbolInfo> lookupSymbolInfo(StringRef Name) const = 0;

/// Return the address of a symbol or std::nullopt if it cannot be found.
virtual std::optional<uint64_t> lookupSymbol(StringRef Name) const = 0;
std::optional<uint64_t> lookupSymbol(StringRef Name) const {
if (const auto Info = lookupSymbolInfo(Name))
return Info->Address;
return std::nullopt;
}
};

} // namespace bolt
Expand Down
5 changes: 2 additions & 3 deletions bolt/include/bolt/Rewrite/JITLinkLinker.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "bolt/Rewrite/ExecutableFileMemoryManager.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"

#include <map>
#include <memory>
#include <vector>

Expand All @@ -35,15 +34,15 @@ class JITLinkLinker : public BOLTLinker {
std::unique_ptr<ExecutableFileMemoryManager> MM;
jitlink::JITLinkDylib Dylib{"main"};
std::vector<ExecutableFileMemoryManager::FinalizedAlloc> Allocs;
std::map<std::string, uint64_t> Symtab;
StringMap<SymbolInfo> Symtab;

public:
JITLinkLinker(BinaryContext &BC,
std::unique_ptr<ExecutableFileMemoryManager> MM);
~JITLinkLinker();

void loadObject(MemoryBufferRef Obj, SectionsMapper MapSections) override;
std::optional<uint64_t> lookupSymbol(StringRef Name) const override;
std::optional<SymbolInfo> lookupSymbolInfo(StringRef Name) const override;

static SmallVector<jitlink::Block *, 2>
orderedBlocks(const jitlink::Section &Section);
Expand Down
2 changes: 1 addition & 1 deletion bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class RewriteInstance {
void mapAllocatableSections(BOLTLinker::SectionMapper MapSection);

/// Update output object's values based on the final \p Layout.
void updateOutputValues(const MCAsmLayout &Layout);
void updateOutputValues(const BOLTLinker &Linker);

/// Rewrite back all functions (hopefully optimized) that fit in the original
/// memory footprint for that function. If the function is now larger and does
Expand Down
1 change: 0 additions & 1 deletion bolt/lib/Core/BinaryBasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Errc.h"

Expand Down
55 changes: 25 additions & 30 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
Expand Down Expand Up @@ -4030,33 +4029,37 @@ void BinaryFunction::calculateLoopInfo() {
}
}

void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) {
if (!isEmitted()) {
assert(!isInjected() && "injected function should be emitted");
setOutputAddress(getAddress());
setOutputSize(getSize());
return;
}

const uint64_t BaseAddress = getCodeSection()->getOutputAddress();
const auto SymbolInfo = Linker.lookupSymbolInfo(getSymbol()->getName());
assert(SymbolInfo && "Cannot find function entry symbol");
setOutputAddress(SymbolInfo->Address);
setOutputSize(SymbolInfo->Size);

if (BC.HasRelocations || isInjected()) {
const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol());
const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
setOutputAddress(BaseAddress + StartOffset);
setOutputSize(EndOffset - StartOffset);
if (hasConstantIsland()) {
const uint64_t DataOffset =
Layout.getSymbolOffset(*getFunctionConstantIslandLabel());
setOutputDataAddress(BaseAddress + DataOffset);
const auto DataAddress =
Linker.lookupSymbol(getFunctionConstantIslandLabel()->getName());
assert(DataAddress && "Cannot find function CI symbol");
setOutputDataAddress(*DataAddress);
for (auto It : Islands->Offsets) {
const uint64_t OldOffset = It.first;
BinaryData *BD = BC.getBinaryDataAtAddress(getAddress() + OldOffset);
if (!BD)
continue;

MCSymbol *Symbol = It.second;
const uint64_t NewOffset = Layout.getSymbolOffset(*Symbol);
BD->setOutputLocation(*getCodeSection(), NewOffset);
const auto NewAddress = Linker.lookupSymbol(Symbol->getName());
assert(NewAddress && "Cannot find CI symbol");
auto &Section = *getCodeSection();
const auto NewOffset = *NewAddress - Section.getOutputAddress();
BD->setOutputLocation(Section, NewOffset);
}
}
if (isSplit()) {
Expand All @@ -4066,7 +4069,6 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
// If fragment is empty, cold section might not exist
if (FF.empty() && ColdSection.getError())
continue;
const uint64_t ColdBaseAddress = ColdSection->getOutputAddress();

const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum());
// If fragment is empty, symbol might have not been emitted
Expand All @@ -4075,31 +4077,24 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
continue;
assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
"split function should have defined cold symbol");
const MCSymbol *ColdEndSymbol =
getFunctionEndLabel(FF.getFragmentNum());
assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
"split function should have defined cold end symbol");
const uint64_t ColdStartOffset =
Layout.getSymbolOffset(*ColdStartSymbol);
const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
FF.setAddress(ColdBaseAddress + ColdStartOffset);
FF.setImageSize(ColdEndOffset - ColdStartOffset);
const auto ColdStartSymbolInfo =
Linker.lookupSymbolInfo(ColdStartSymbol->getName());
assert(ColdStartSymbolInfo && "Cannot find cold start symbol");
FF.setAddress(ColdStartSymbolInfo->Address);
FF.setImageSize(ColdStartSymbolInfo->Size);
if (hasConstantIsland()) {
const uint64_t DataOffset =
Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
setOutputColdDataAddress(ColdBaseAddress + DataOffset);
const auto DataAddress = Linker.lookupSymbol(
getFunctionColdConstantIslandLabel()->getName());
assert(DataAddress && "Cannot find cold CI symbol");
setOutputColdDataAddress(*DataAddress);
}
}
}
} else {
setOutputAddress(getAddress());
setOutputSize(Layout.getSymbolOffset(*getFunctionEndLabel()));
}

// Update basic block output ranges for the debug info, if we have
// secondary entry points in the symbol table to update or if writing BAT.
if (!opts::UpdateDebugSections && !isMultiEntry() &&
!requiresAddressTranslation())
if (!requiresAddressMap())
return;

// Output ranges should match the input if the body hasn't changed.
Expand Down
7 changes: 4 additions & 3 deletions bolt/lib/Rewrite/JITLinkLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ struct JITLinkLinker::Context : jitlink::JITLinkContext {
});

for (auto *Symbol : G.defined_symbols()) {
Linker.Symtab.insert(
{Symbol->getName().str(), Symbol->getAddress().getValue()});
SymbolInfo Info{Symbol->getAddress().getValue(), Symbol->getSize()};
Linker.Symtab.insert({Symbol->getName().str(), Info});
}

return Error::success();
Expand Down Expand Up @@ -174,7 +174,8 @@ void JITLinkLinker::loadObject(MemoryBufferRef Obj,
jitlink::link(std::move(*LG), std::move(Ctx));
}

std::optional<uint64_t> JITLinkLinker::lookupSymbol(StringRef Name) const {
std::optional<JITLinkLinker::SymbolInfo>
JITLinkLinker::lookupSymbolInfo(StringRef Name) const {
auto It = Symtab.find(Name.data());
if (It == Symtab.end())
return std::nullopt;
Expand Down
4 changes: 0 additions & 4 deletions bolt/lib/Rewrite/MachORewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "bolt/Rewrite/JITLinkLinker.h"
#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
#include "bolt/Utils/Utils.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
Expand Down Expand Up @@ -476,9 +475,6 @@ void MachORewriteInstance::emitAndLink() {
"error creating in-memory object");
assert(Obj && "createObjectFile cannot return nullptr");

MCAsmLayout FinalLayout(
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());

auto EFMM = std::make_unique<ExecutableFileMemoryManager>(*BC);
EFMM->setNewSecPrefix(getNewSecPrefix());
EFMM->setOrgSecPrefix(getOrgSecPrefix());
Expand Down
14 changes: 7 additions & 7 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3241,15 +3241,15 @@ void RewriteInstance::emitAndLink() {
Linker->loadObject(ObjectMemBuffer->getMemBufferRef(),
[this](auto MapSection) { mapFileSections(MapSection); });

MCAsmLayout FinalLayout(
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());

// Update output addresses based on the new section map and
// layout. Only do this for the object created by ourselves.
updateOutputValues(FinalLayout);
updateOutputValues(*Linker);

if (opts::UpdateDebugSections)
if (opts::UpdateDebugSections) {
MCAsmLayout FinalLayout(
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
DebugInfoRewriter->updateLineTableOffsets(FinalLayout);
}

if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
Expand Down Expand Up @@ -3644,15 +3644,15 @@ void RewriteInstance::mapAllocatableSections(
}
}

void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
void RewriteInstance::updateOutputValues(const BOLTLinker &Linker) {
if (auto MapSection = BC->getUniqueSectionByName(AddressMap::SectionName)) {
auto Map = AddressMap::parse(MapSection->getOutputContents(), *BC);
BC->setIOAddressMap(std::move(Map));
BC->deregisterSection(*MapSection);
}

for (BinaryFunction *Function : BC->getAllBinaryFunctions())
Function->updateOutputValues(Layout);
Function->updateOutputValues(Linker);
}

void RewriteInstance::patchELFPHDRTable() {
Expand Down

0 comments on commit 475a93a

Please sign in to comment.