Skip to content

Commit

Permalink
[lld-macho] Check address ranges when applying relocations
Browse files Browse the repository at this point in the history
This diff required fixing `getEmbeddedAddend` to apply sign
extension to 32-bit values. We were previously passing around wrong
64-bit addend values that became "right" after being truncated back to
32-bit.

I've also made `getEmbeddedAddend` return a signed int, which is similar
to what LLD-ELF does for its `getImplicitAddend`.

`reportRangeError`, `checkUInt`, and `checkInt` are counterparts of similar
functions in LLD-ELF.

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D98387
  • Loading branch information
int3 committed Mar 12, 2021
1 parent 7b5ab95 commit dc8bee9
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 34 deletions.
52 changes: 35 additions & 17 deletions lld/MachO/Arch/ARM64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ namespace {
struct ARM64 : TargetInfo {
ARM64();

uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
const relocation_info) const override;
int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
const relocation_info) const override;
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
uint64_t pc) const override;

Expand Down Expand Up @@ -77,8 +77,8 @@ const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
return relocAttrsArray[type];
}

uint64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
const relocation_info rel) const {
int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
const relocation_info rel) const {
if (rel.r_type != ARM64_RELOC_UNSIGNED &&
rel.r_type != ARM64_RELOC_SUBTRACTOR) {
// All other reloc types should use the ADDEND relocation to store their
Expand All @@ -91,7 +91,7 @@ uint64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
const uint8_t *loc = buf + sec.offset + rel.r_address;
switch (rel.r_length) {
case 2:
return read32le(loc);
return static_cast<int32_t>(read32le(loc));
case 3:
return read64le(loc);
default:
Expand All @@ -108,18 +108,30 @@ inline uint64_t bitField(uint64_t value, int right, int width, int left) {
// | | imm26 |
// +-----------+---------------------------------------------------+

inline uint64_t encodeBranch26(uint64_t base, uint64_t va) {
inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) {
checkInt(r, va, 28);
// Since branch destinations are 4-byte aligned, the 2 least-
// significant bits are 0. They are right shifted off the end.
return (base | bitField(va, 2, 26, 0));
}

inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
checkInt(d, va, 28);
return (base | bitField(va, 2, 26, 0));
}

// 30 29 23 5
// +-+---+---------+-------------------------------------+---------+
// | |ilo| | immhi | |
// +-+---+---------+-------------------------------------+---------+

inline uint64_t encodePage21(uint64_t base, uint64_t va) {
inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) {
checkInt(r, va, 35);
return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
}

inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
checkInt(d, va, 35);
return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
}

Expand Down Expand Up @@ -158,21 +170,25 @@ void ARM64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
value += r.addend;
switch (r.type) {
case ARM64_RELOC_BRANCH26:
value = encodeBranch26(base, value - pc);
value = encodeBranch26(r, base, value - pc);
break;
case ARM64_RELOC_SUBTRACTOR:
case ARM64_RELOC_UNSIGNED:
if (r.length == 2)
checkInt(r, value, 32);
break;
case ARM64_RELOC_POINTER_TO_GOT:
if (r.pcrel)
value -= pc;
checkInt(r, value, 32);
break;
case ARM64_RELOC_PAGE21:
case ARM64_RELOC_GOT_LOAD_PAGE21:
case ARM64_RELOC_TLVP_LOAD_PAGE21:
case ARM64_RELOC_TLVP_LOAD_PAGE21: {
assert(r.pcrel);
value = encodePage21(base, pageBits(value) - pageBits(pc));
value = encodePage21(r, base, pageBits(value) - pageBits(pc));
break;
}
case ARM64_RELOC_PAGEOFF12:
case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
Expand Down Expand Up @@ -206,7 +222,8 @@ void ARM64::writeStub(uint8_t *buf8, const macho::Symbol &sym) const {
uint64_t pcPageBits =
pageBits(in.stubs->addr + sym.stubsIndex * sizeof(stubCode));
uint64_t lazyPointerVA = in.lazyPointers->addr + sym.stubsIndex * WordSize;
buf32[0] = encodePage21(stubCode[0], pageBits(lazyPointerVA) - pcPageBits);
buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
pageBits(lazyPointerVA) - pcPageBits);
buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
buf32[2] = stubCode[2];
}
Expand All @@ -226,14 +243,15 @@ void ARM64::writeStubHelperHeader(uint8_t *buf8) const {
return pageBits(in.stubHelper->addr + i * sizeof(uint32_t));
};
uint64_t loaderVA = in.imageLoaderCache->getVA();
buf32[0] =
encodePage21(stubHelperHeaderCode[0], pageBits(loaderVA) - pcPageBits(0));
SymbolDiagnostic d = {nullptr, "stub header helper"};
buf32[0] = encodePage21(d, stubHelperHeaderCode[0],
pageBits(loaderVA) - pcPageBits(0));
buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
buf32[2] = stubHelperHeaderCode[2];
uint64_t binderVA =
in.got->addr + in.stubHelper->stubBinder->gotIndex * WordSize;
buf32[3] =
encodePage21(stubHelperHeaderCode[3], pageBits(binderVA) - pcPageBits(3));
buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
pageBits(binderVA) - pcPageBits(3));
buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
buf32[5] = stubHelperHeaderCode[5];
}
Expand All @@ -250,8 +268,8 @@ void ARM64::writeStubHelperEntry(uint8_t *buf8, const DylibSymbol &sym,
auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); };
uint64_t stubHelperHeaderVA = in.stubHelper->addr;
buf32[0] = stubHelperEntryCode[0];
buf32[1] =
encodeBranch26(stubHelperEntryCode[1], stubHelperHeaderVA - pcVA(1));
buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1],
stubHelperHeaderVA - pcVA(1));
buf32[2] = sym.lazyBindOffset;
}

Expand Down
31 changes: 19 additions & 12 deletions lld/MachO/Arch/X86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ namespace {
struct X86_64 : TargetInfo {
X86_64();

uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
const relocation_info) const override;
int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
const relocation_info) const override;
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
uint64_t relocVA) const override;

Expand Down Expand Up @@ -77,14 +77,14 @@ static int pcrelOffset(uint8_t type) {
}
}

uint64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
relocation_info rel) const {
int64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
relocation_info rel) const {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
const uint8_t *loc = buf + sec.offset + rel.r_address;

switch (rel.r_length) {
case 2:
return read32le(loc) + pcrelOffset(rel.r_type);
return static_cast<int32_t>(read32le(loc)) + pcrelOffset(rel.r_type);
case 3:
return read64le(loc) + pcrelOffset(rel.r_type);
default:
Expand All @@ -102,6 +102,10 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,

switch (r.length) {
case 2:
if (r.type == X86_64_RELOC_UNSIGNED)
checkUInt(r, value, 32);
else
checkInt(r, value, 32);
write32le(loc, value);
break;
case 3:
Expand All @@ -121,9 +125,10 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
// bufAddr: The virtual address corresponding to buf[0].
// bufOff: The offset within buf of the next instruction.
// destAddr: The destination address that the current instruction references.
static void writeRipRelative(uint8_t *buf, uint64_t bufAddr, uint64_t bufOff,
uint64_t destAddr) {
static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr,
uint64_t bufOff, uint64_t destAddr) {
uint64_t rip = bufAddr + bufOff;
checkInt(d, destAddr - rip, 32);
// For the instructions we care about, the RIP-relative address is always
// stored in the last 4 bytes of the instruction.
write32le(buf + bufOff - 4, destAddr - rip);
Expand All @@ -136,7 +141,7 @@ static constexpr uint8_t stub[] = {
void X86_64::writeStub(uint8_t *buf, const macho::Symbol &sym) const {
memcpy(buf, stub, 2); // just copy the two nonzero bytes
uint64_t stubAddr = in.stubs->addr + sym.stubsIndex * sizeof(stub);
writeRipRelative(buf, stubAddr, sizeof(stub),
writeRipRelative({&sym, "stub"}, buf, stubAddr, sizeof(stub),
in.lazyPointers->addr + sym.stubsIndex * WordSize);
}

Expand All @@ -149,8 +154,10 @@ static constexpr uint8_t stubHelperHeader[] = {

void X86_64::writeStubHelperHeader(uint8_t *buf) const {
memcpy(buf, stubHelperHeader, sizeof(stubHelperHeader));
writeRipRelative(buf, in.stubHelper->addr, 7, in.imageLoaderCache->getVA());
writeRipRelative(buf, in.stubHelper->addr, 0xf,
SymbolDiagnostic d = {nullptr, "stub helper header"};
writeRipRelative(d, buf, in.stubHelper->addr, 7,
in.imageLoaderCache->getVA());
writeRipRelative(d, buf, in.stubHelper->addr, 0xf,
in.got->addr +
in.stubHelper->stubBinder->gotIndex * WordSize);
}
Expand All @@ -164,8 +171,8 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
uint64_t entryAddr) const {
memcpy(buf, stubHelperEntry, sizeof(stubHelperEntry));
write32le(buf + 1, sym.lazyBindOffset);
writeRipRelative(buf, entryAddr, sizeof(stubHelperEntry),
in.stubHelper->addr);
writeRipRelative({&sym, "stub helper"}, buf, entryAddr,
sizeof(stubHelperEntry), in.stubHelper->addr);
}

void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
Expand Down
6 changes: 3 additions & 3 deletions lld/MachO/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
// and insert them. Storing addends in the instruction stream is
// possible, but inconvenient and more costly at link time.

uint64_t pairedAddend = 0;
int64_t pairedAddend = 0;
relocation_info relInfo = relInfos[i];
if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
Expand All @@ -276,9 +276,9 @@ void ObjFile::parseRelocations(const section_64 &sec,
if (relInfo.r_address & R_SCATTERED)
fatal("TODO: Scattered relocations not supported");

uint64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
assert(!(embeddedAddend && pairedAddend));
uint64_t totalAddend = pairedAddend + embeddedAddend;
int64_t totalAddend = pairedAddend + embeddedAddend;
Reloc r;
r.type = relInfo.r_type;
r.pcrel = relInfo.r_pcrel;
Expand Down
20 changes: 20 additions & 0 deletions lld/MachO/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,24 @@ bool macho::validateSymbolRelocation(const Symbol *sym,
return valid;
}

void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits,
int64_t min, uint64_t max) {
std::string hint;
if (auto *sym = r.referent.dyn_cast<Symbol *>())
hint = "; references " + toString(*sym);
// TODO: get location of reloc using something like LLD-ELF's getErrorPlace()
error("relocation " + target->getRelocAttrs(r.type).name +
" is out of range: " + v + " is not in [" + Twine(min) + ", " +
Twine(max) + "]" + hint);
}

void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits,
int64_t min, uint64_t max) {
std::string hint;
if (d.symbol)
hint = "; references " + toString(*d.symbol);
error(d.reason + " is out of range: " + v + " is not in [" + Twine(min) +
", " + Twine(max) + "]" + hint);
}

const RelocAttrs macho::invalidRelocAttrs{"INVALID", RelocAttrBits::_0};
30 changes: 29 additions & 1 deletion lld/MachO/Relocations.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,41 @@ struct Reloc {
uint32_t offset = 0;
// Adding this offset to the address of the referent symbol or subsection
// gives the destination that this relocation refers to.
uint64_t addend = 0;
int64_t addend = 0;
llvm::PointerUnion<Symbol *, InputSection *> referent = nullptr;
};

bool validateSymbolRelocation(const Symbol *, const InputSection *,
const Reloc &);

/*
* v: The value the relocation is attempting to encode
* bits: The number of bits actually available to encode this relocation
*/
void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits,
int64_t min, uint64_t max);

struct SymbolDiagnostic {
const Symbol *symbol;
llvm::StringRef reason;
};

void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits,
int64_t min, uint64_t max);

template <typename Diagnostic>
inline void checkInt(Diagnostic d, int64_t v, int bits) {
if (v != llvm::SignExtend64(v, bits))
reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits),
llvm::maxIntN(bits));
}

template <typename Diagnostic>
inline void checkUInt(Diagnostic d, uint64_t v, int bits) {
if ((v >> bits) != 0)
reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
}

extern const RelocAttrs invalidRelocAttrs;

} // namespace macho
Expand Down
2 changes: 1 addition & 1 deletion lld/MachO/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class TargetInfo {
virtual ~TargetInfo() = default;

// Validate the relocation structure and get its addend.
virtual uint64_t
virtual int64_t
getEmbeddedAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &,
const llvm::MachO::relocation_info) const = 0;
virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
Expand Down
32 changes: 32 additions & 0 deletions lld/test/MachO/invalid/range-check.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# REQUIRES: x86

# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/bar.s -o %t/bar.o
# RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib
# RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s

# CHECK: error: relocation UNSIGNED is out of range: 8589942792 is not in [0, 4294967295]; references _foo
# CHECK: error: relocation GOT_LOAD is out of range: 4294974033 is not in [-2147483648, 2147483647]; references _foo
# CHECK: error: stub is out of range: 4294974006 is not in [-2147483648, 2147483647]; references _bar
# CHECK: error: stub helper header is out of range: 4294974005 is not in [-2147483648, 2147483647]
# CHECK: error: stub helper header is out of range: 4294969893 is not in [-2147483648, 2147483647]

#--- bar.s
.globl _bar
_bar:

#--- test.s
.globl _main, _foo

_main:
movq _foo@GOTPCREL(%rip), %rax
callq _bar
ret

.int _foo
.zerofill __TEXT,bss,_zero,0xffffffff

.data
_foo:
.space 0

0 comments on commit dc8bee9

Please sign in to comment.