Skip to content

Commit

Permalink
[lld-macho] Have relocation address included in range-check error mes…
Browse files Browse the repository at this point in the history
…sage

This makes it easier to debug those errors. See e.g. #52767 (comment)

We take the approach of 'reverse-engineering' the InputSection from the
output buffer offset. This provides for a cleaner Target API, and is
similar to LLD-ELF's implementation of getErrorPlace().

Reviewed By: #lld-macho, Roger

Differential Revision: https://reviews.llvm.org/D118903
  • Loading branch information
int3 committed Mar 1, 2022
1 parent e03d216 commit a552fb2
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 67 deletions.
41 changes: 21 additions & 20 deletions lld/MachO/Arch/ARM64Common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,56 +38,57 @@ int64_t ARM64Common::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
}
}

static void writeValue(uint8_t *loc, const Reloc &r, uint64_t value) {
switch (r.length) {
case 2:
checkInt(loc, r, value, 32);
write32le(loc, value);
break;
case 3:
write64le(loc, value);
break;
default:
llvm_unreachable("invalid r_length");
}
}

// For instruction relocations (load, store, add), the base
// instruction is pre-populated in the text section. A pre-populated
// instruction has opcode & register-operand bits set, with immediate
// operands zeroed. We read it from text, OR-in the immediate
// operands, then write-back the completed instruction.

void ARM64Common::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
uint64_t pc) const {
auto loc32 = reinterpret_cast<uint32_t *>(loc);
uint32_t base = ((r.length == 2) ? read32le(loc) : 0);
switch (r.type) {
case ARM64_RELOC_BRANCH26:
value = encodeBranch26(r, base, value - pc);
encodeBranch26(loc32, r, base, value - pc);
break;
case ARM64_RELOC_SUBTRACTOR:
case ARM64_RELOC_UNSIGNED:
if (r.length == 2)
checkInt(r, value, 32);
writeValue(loc, r, value);
break;
case ARM64_RELOC_POINTER_TO_GOT:
if (r.pcrel)
value -= pc;
checkInt(r, value, 32);
writeValue(loc, r, value);
break;
case ARM64_RELOC_PAGE21:
case ARM64_RELOC_GOT_LOAD_PAGE21:
case ARM64_RELOC_TLVP_LOAD_PAGE21: {
case ARM64_RELOC_TLVP_LOAD_PAGE21:
assert(r.pcrel);
value = encodePage21(r, base, pageBits(value) - pageBits(pc));
encodePage21(loc32, r, base, pageBits(value) - pageBits(pc));
break;
}
case ARM64_RELOC_PAGEOFF12:
case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
assert(!r.pcrel);
value = encodePageOff12(base, value);
encodePageOff12(loc32, base, value);
break;
default:
llvm_unreachable("unexpected relocation type");
}

switch (r.length) {
case 2:
write32le(loc, value);
break;
case 3:
write64le(loc, value);
break;
default:
llvm_unreachable("invalid r_length");
}
}

void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const {
Expand Down
57 changes: 32 additions & 25 deletions lld/MachO/Arch/ARM64Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,39 +40,45 @@ inline uint64_t bitField(uint64_t value, int right, int width, int left) {
// | | imm26 |
// +-----------+---------------------------------------------------+

inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) {
checkInt(r, va, 28);
inline void encodeBranch26(uint32_t *loc, const Reloc &r, uint32_t base,
uint64_t va) {
checkInt(loc, r, va, 28);
// Since branch destinations are 4-byte aligned, the 2 least-
// significant bits are 0. They are right shifted off the end.
return (base | bitField(va, 2, 26, 0));
llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
}

inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
checkInt(d, va, 28);
return (base | bitField(va, 2, 26, 0));
inline void encodeBranch26(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
uint64_t va) {
checkInt(loc, d, va, 28);
llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
}

// 30 29 23 5
// +-+---+---------+-------------------------------------+---------+
// | |ilo| | immhi | |
// +-+---+---------+-------------------------------------+---------+

inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) {
checkInt(r, va, 35);
return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
inline void encodePage21(uint32_t *loc, const Reloc &r, uint32_t base,
uint64_t va) {
checkInt(loc, r, va, 35);
llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
bitField(va, 14, 19, 5));
}

inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
checkInt(d, va, 35);
return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
inline void encodePage21(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
uint64_t va) {
checkInt(loc, d, va, 35);
llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
bitField(va, 14, 19, 5));
}

// 21 10
// +-------------------+-----------------------+-------------------+
// | | imm12 | |
// +-------------------+-----------------------+-------------------+

inline uint64_t encodePageOff12(uint32_t base, uint64_t va) {
inline void encodePageOff12(uint32_t *loc, uint32_t base, uint64_t va) {
int scale = 0;
if ((base & 0x3b00'0000) == 0x3900'0000) { // load/store
scale = base >> 30;
Expand All @@ -82,7 +88,8 @@ inline uint64_t encodePageOff12(uint32_t base, uint64_t va) {

// TODO(gkm): extract embedded addend and warn if != 0
// uint64_t addend = ((base & 0x003FFC00) >> 10);
return (base | bitField(va, scale, 12 - scale, 10));
llvm::support::endian::write32le(loc,
base | bitField(va, scale, 12 - scale, 10));
}

inline uint64_t pageBits(uint64_t address) {
Expand All @@ -99,9 +106,9 @@ inline void writeStub(uint8_t *buf8, const uint32_t stubCode[3],
pageBits(in.stubs->addr + sym.stubsIndex * stubCodeSize);
uint64_t lazyPointerVA =
in.lazyPointers->addr + sym.stubsIndex * LP::wordSize;
buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
pageBits(lazyPointerVA) - pcPageBits);
buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
encodePage21(&buf32[0], {&sym, "stub"}, stubCode[0],
pageBits(lazyPointerVA) - pcPageBits);
encodePageOff12(&buf32[1], stubCode[1], lazyPointerVA);
buf32[2] = stubCode[2];
}

Expand All @@ -114,15 +121,15 @@ inline void writeStubHelperHeader(uint8_t *buf8,
};
uint64_t loaderVA = in.imageLoaderCache->getVA();
SymbolDiagnostic d = {nullptr, "stub header helper"};
buf32[0] = encodePage21(d, stubHelperHeaderCode[0],
pageBits(loaderVA) - pcPageBits(0));
buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
encodePage21(&buf32[0], d, stubHelperHeaderCode[0],
pageBits(loaderVA) - pcPageBits(0));
encodePageOff12(&buf32[1], stubHelperHeaderCode[1], loaderVA);
buf32[2] = stubHelperHeaderCode[2];
uint64_t binderVA =
in.got->addr + in.stubHelper->stubBinder->gotIndex * LP::wordSize;
buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
pageBits(binderVA) - pcPageBits(3));
buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
encodePage21(&buf32[3], d, stubHelperHeaderCode[3],
pageBits(binderVA) - pcPageBits(3));
encodePageOff12(&buf32[4], stubHelperHeaderCode[4], binderVA);
buf32[5] = stubHelperHeaderCode[5];
}

Expand All @@ -133,8 +140,8 @@ inline void writeStubHelperEntry(uint8_t *buf8,
auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); };
uint64_t stubHelperHeaderVA = in.stubHelper->addr;
buf32[0] = stubHelperEntryCode[0];
buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1],
stubHelperHeaderVA - pcVA(1));
encodeBranch26(&buf32[1], {&sym, "stub helper"}, stubHelperEntryCode[1],
stubHelperHeaderVA - pcVA(1));
buf32[2] = sym.lazyBindOffset;
}

Expand Down
6 changes: 3 additions & 3 deletions lld/MachO/Arch/X86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
switch (r.length) {
case 2:
if (r.type == X86_64_RELOC_UNSIGNED)
checkUInt(r, value, 32);
checkUInt(loc, r, value, 32);
else
checkInt(r, value, 32);
checkInt(loc, r, value, 32);
write32le(loc, value);
break;
case 3:
Expand All @@ -127,7 +127,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr,
uint64_t bufOff, uint64_t destAddr) {
uint64_t rip = bufAddr + bufOff;
checkInt(d, destAddr - rip, 32);
checkInt(buf, d, destAddr - rip, 32);
// For the instructions we care about, the RIP-relative address is always
// stored in the last 4 bytes of the instruction.
write32le(buf + bufOff - 4, destAddr - rip);
Expand Down
59 changes: 53 additions & 6 deletions lld/MachO/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "Relocations.h"
#include "ConcatOutputSection.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
Expand Down Expand Up @@ -38,19 +39,65 @@ bool macho::validateSymbolRelocation(const Symbol *sym,
return valid;
}
void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits,
int64_t min, uint64_t max) {
// Given an offset in the output buffer, figure out which ConcatInputSection (if
// any) maps to it. At the same time, update the offset such that it is relative
// to the InputSection rather than to the output buffer.
//
// Obtaining the InputSection allows us to have better error diagnostics.
// However, many of our relocation-handling methods do not take the InputSection
// as a parameter. Since we are already passing the buffer offsets to our Target
// methods, this function allows us to emit better errors without threading an
// additional InputSection argument through the call stack.
//
// This is implemented as a slow linear search through OutputSegments,
// OutputSections, and finally the InputSections themselves. However, this
// function should be called only on error paths, so some overhead is fine.
static InputSection *offsetToInputSection(uint64_t *off) {
for (OutputSegment *seg : outputSegments) {
if (*off < seg->fileOff || *off >= seg->fileOff + seg->fileSize)
continue;
const std::vector<OutputSection *> &sections = seg->getSections();
size_t osecIdx = 0;
for (; osecIdx < sections.size(); ++osecIdx)
if (*off < sections[osecIdx]->fileOff)
break;
assert(osecIdx > 0);
// We should be only calling this function on offsets that belong to
// ConcatOutputSections.
auto *osec = cast<ConcatOutputSection>(sections[osecIdx - 1]);
*off -= osec->fileOff;
size_t isecIdx = 0;
for (; isecIdx < osec->inputs.size(); ++isecIdx) {
const ConcatInputSection *isec = osec->inputs[isecIdx];
if (*off < isec->outSecOff)
break;
}
assert(isecIdx > 0);
ConcatInputSection *isec = osec->inputs[isecIdx - 1];
*off -= isec->outSecOff;
return isec;
}
return nullptr;
}
void macho::reportRangeError(void *loc, const Reloc &r, const Twine &v,
uint8_t bits, int64_t min, uint64_t max) {
std::string hint;
uint64_t off = reinterpret_cast<const uint8_t *>(loc) - in.bufferStart;
const InputSection *isec = offsetToInputSection(&off);
std::string locStr = isec ? isec->getLocation(off) : "(invalid location)";
if (auto *sym = r.referent.dyn_cast<Symbol *>())
hint = "; references " + toString(*sym);
// TODO: get location of reloc using something like LLD-ELF's getErrorPlace()
error("relocation " + target->getRelocAttrs(r.type).name +
error(locStr + ": relocation " + target->getRelocAttrs(r.type).name +
" is out of range: " + v + " is not in [" + Twine(min) + ", " +
Twine(max) + "]" + hint);
}
void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits,
int64_t min, uint64_t max) {
void macho::reportRangeError(void *loc, SymbolDiagnostic d, const Twine &v,
uint8_t bits, int64_t min, uint64_t max) {
// FIXME: should we use `loc` somehow to provide a better error message?
std::string hint;
if (d.symbol)
hint = "; references " + toString(*d.symbol);
Expand Down
16 changes: 8 additions & 8 deletions lld/MachO/Relocations.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,28 +70,28 @@ bool validateSymbolRelocation(const Symbol *, const InputSection *,
* v: The value the relocation is attempting to encode
* bits: The number of bits actually available to encode this relocation
*/
void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits,
int64_t min, uint64_t max);
void reportRangeError(void *loc, const Reloc &, const llvm::Twine &v,
uint8_t bits, int64_t min, uint64_t max);

struct SymbolDiagnostic {
const Symbol *symbol;
llvm::StringRef reason;
};

void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits,
int64_t min, uint64_t max);
void reportRangeError(void *loc, SymbolDiagnostic, const llvm::Twine &v,
uint8_t bits, int64_t min, uint64_t max);

template <typename Diagnostic>
inline void checkInt(Diagnostic d, int64_t v, int bits) {
inline void checkInt(void *loc, Diagnostic d, int64_t v, int bits) {
if (v != llvm::SignExtend64(v, bits))
reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits),
reportRangeError(loc, d, llvm::Twine(v), bits, llvm::minIntN(bits),
llvm::maxIntN(bits));
}

template <typename Diagnostic>
inline void checkUInt(Diagnostic d, uint64_t v, int bits) {
inline void checkUInt(void *loc, Diagnostic d, uint64_t v, int bits) {
if ((v >> bits) != 0)
reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
reportRangeError(loc, d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
}

inline void writeAddress(uint8_t *loc, uint64_t addr, uint8_t length) {
Expand Down
1 change: 1 addition & 0 deletions lld/MachO/SyntheticSections.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ class WordLiteralSection final : public SyntheticSection {
};

struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
CStringSection *cStringSection = nullptr;
WordLiteralSection *wordLiteralSection = nullptr;
Expand Down
6 changes: 3 additions & 3 deletions lld/MachO/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1048,10 +1048,10 @@ void Writer::openFile() {
FileOutputBuffer::F_executable);

if (!bufferOrErr)
error("failed to open " + config->outputFile + ": " +
fatal("failed to open " + config->outputFile + ": " +
llvm::toString(bufferOrErr.takeError()));
else
buffer = std::move(*bufferOrErr);
buffer = std::move(*bufferOrErr);
in.bufferStart = buffer->getBufferStart();
}

void Writer::writeSections() {
Expand Down
4 changes: 2 additions & 2 deletions lld/test/MachO/invalid/range-check.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
# RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib
# RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s

# CHECK: error: relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo
# CHECK: error: relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo
# CHECK: error: {{.*}}test.o:(symbol _main+0xd): relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo
# CHECK: error: {{.*}}test.o:(symbol _main+0x3): relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo
# CHECK: error: stub is out of range: [[#]] is not in [-2147483648, 2147483647]; references _bar
# CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]
# CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]
Expand Down

0 comments on commit a552fb2

Please sign in to comment.