diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 16d019fb477bd5..89405d21878a2e 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -1172,16 +1172,37 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader, // // Concretely, we expect our relocations to write the value of `PC - // target_addr` to `PC`. `PC` itself is denoted by a minuend relocation that -// points to a symbol or section plus an addend. +// points to a symbol plus an addend. +// +// It is important that the minuend relocation point to a symbol within the +// same section as the fixup value, since sections may get moved around. +// +// For example, for arm64, llvm-mc emits relocations for the target function +// address like so: +// +// ltmp: +// +// ... +// +// ... multiple FDEs ... +// +// +// ... +// +// If any of the FDEs in `multiple FDEs` get dead-stripped, then `FDE start` +// will move to an earlier address, and `ltmp + pcrel offset` will no longer +// reflect an accurate pcrel value. To avoid this problem, we "canonicalize" +// our relocation by adding an `EH_Frame` symbol at `FDE start`, and updating +// the reloc to be `target function address - (EH_Frame + new pcrel offset)`. // // If `Invert` is set, then we instead expect `target_addr - PC` to be written // to `PC`. template Defined * -getTargetSymbolFromSubtraction(const InputSection *isec, - std::vector::iterator relocIt) { - const macho::Reloc &subtrahend = *relocIt; - const macho::Reloc &minuend = *std::next(relocIt); +targetSymFromCanonicalSubtractor(const InputSection *isec, + std::vector::iterator relocIt) { + macho::Reloc &subtrahend = *relocIt; + macho::Reloc &minuend = *std::next(relocIt); assert(target->hasAttr(subtrahend.type, RelocAttrBits::SUBTRAHEND)); assert(target->hasAttr(minuend.type, RelocAttrBits::UNSIGNED)); // Note: pcSym may *not* be exactly at the PC; there's usually a non-zero @@ -1196,9 +1217,21 @@ getTargetSymbolFromSubtraction(const InputSection *isec, } if (Invert) std::swap(pcSym, target); - if (pcSym->isec != isec || - pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset) - fatal("invalid FDE relocation in __eh_frame"); + if (pcSym->isec == isec) { + if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset) + fatal("invalid FDE relocation in __eh_frame"); + } else { + // Ensure the pcReloc points to a symbol within the current EH frame. + // HACK: we should really verify that the original relocation's semantics + // are preserved. In particular, we should have + // `oldSym->value + oldOffset == newSym + newOffset`. However, we don't + // have an easy way to access the offsets from this point in the code; some + // refactoring is needed for that. + macho::Reloc &pcReloc = Invert ? minuend : subtrahend; + pcReloc.referent = isec->symbols[0]; + assert(isec->symbols[0]->value == 0); + minuend.addend = pcReloc.offset * (Invert ? 1LL : -1LL); + } return target; } @@ -1255,7 +1288,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) { if (cieOffRelocIt != isec->relocs.end()) { // We already have an explicit relocation for the CIE offset. cieIsec = - getTargetSymbolFromSubtraction(isec, cieOffRelocIt) + targetSymFromCanonicalSubtractor(isec, cieOffRelocIt) ->isec; dataOff += sizeof(uint32_t); } else { @@ -1310,7 +1343,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) { Defined *funcSym; if (funcAddrRelocIt != isec->relocs.end()) { - funcSym = getTargetSymbolFromSubtraction(isec, funcAddrRelocIt); + funcSym = targetSymFromCanonicalSubtractor(isec, funcAddrRelocIt); } else { funcSym = findSymbolAtAddress(sections, funcAddr); ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize); @@ -1325,7 +1358,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) { InputSection *lsdaIsec = nullptr; if (lsdaAddrRelocIt != isec->relocs.end()) { - lsdaIsec = getTargetSymbolFromSubtraction(isec, lsdaAddrRelocIt)->isec; + lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec; } else if (lsdaAddrOpt) { uint64_t lsdaAddr = *lsdaAddrOpt; Section *sec = findContainingSection(sections, &lsdaAddr); diff --git a/lld/test/MachO/Inputs/eh-frame-arm64-r.o b/lld/test/MachO/Inputs/eh-frame-arm64-r.o new file mode 100644 index 00000000000000..15e73cacf71d04 Binary files /dev/null and b/lld/test/MachO/Inputs/eh-frame-arm64-r.o differ diff --git a/lld/test/MachO/eh-frame.s b/lld/test/MachO/eh-frame.s index 24fd7bfc93f971..7437fdddbc0a36 100644 --- a/lld/test/MachO/eh-frame.s +++ b/lld/test/MachO/eh-frame.s @@ -20,6 +20,18 @@ # RUN: llvm-nm -m %t/eh-frame-x86_64-r | FileCheck %s --check-prefix NO-EH-SYMS # RUN: llvm-readobj --section-headers %t/eh-frame-x86_64-r | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3 +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos11.0 %s -o %t/eh-frame-arm64.o +# RUN: %lld -arch arm64 -lSystem -lc++ %t/eh-frame-arm64.o -o %t/eh-frame-arm64 +# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \ +# RUN: --dwarf=frames %t/eh-frame-arm64 | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=3 +# RUN: llvm-nm -m %t/eh-frame-arm64 | FileCheck %s --check-prefix NO-EH-SYMS + +# COM: ld -r %t/eh-frame-arm64.o -o %S/Inputs/eh-frame-arm64-r.o +# RUN: %lld -arch arm64 -lSystem -lc++ %S/Inputs/eh-frame-arm64-r.o -o %t/eh-frame-arm64-r +# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \ +# RUN: --dwarf=frames %t/eh-frame-arm64-r | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=3 +# RUN: llvm-nm -m %t/eh-frame-arm64-r | FileCheck %s --check-prefix NO-EH-SYMS + # ALIGN: Name: __eh_frame # ALIGN-NEXT: Segment: __TEXT # ALIGN-NEXT: Address: