Skip to content

Commit

Permalink
[lld-macho] Use fewer indirections in UnwindInfo implementation
Browse files Browse the repository at this point in the history
The previous implementation of UnwindInfoSection materialized
all the compact unwind entries & applied their relocations, then parsed
the resulting data to generate the final unwind info. This design had
some unfortunate conseqeuences: since relocations can only be applied
after their referents have had addresses assigned, operations that need
to happen before address assignment must contort themselves. (See
{D113582} and observe how this diff greatly simplifies it.)

Moreover, it made synthesizing new compact unwind entries awkward.
Handling PR50956 will require us to do this synthesis, and is the main
motivation behind this diff.

Previously, instead of generating a new CompactUnwindEntry directly, we
would have had to generate a ConcatInputSection with a number of
`Reloc`s that would then get "flattened" into a CompactUnwindEntry.

This diff introduces an internal representation of `CompactUnwindEntry`
(the former `CompactUnwindEntry` has been renamed to
`CompactUnwindLayout`). The new CompactUnwindEntry stores references to
its personality symbol and LSDA section directly, without the use of
`Reloc` structs.

In addition to being easier to work with, this diff also allows us to
handle unwind info whose personality symbols are located in sections
placed after the `__unwind_info`.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D123276
  • Loading branch information
int3 committed Apr 9, 2022
1 parent a58d0af commit 82dcf30
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 113 deletions.
5 changes: 5 additions & 0 deletions lld/MachO/InputFiles.cpp
Expand Up @@ -1013,6 +1013,11 @@ void ObjFile::registerCompactUnwind() {
referentIsec =
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
}
// Unwind info lives in __DATA, and finalization of __TEXT will occur
// before finalization of __DATA. Moreover, the finalization of unwind
// info depends on the exact addresses that it references. So it is safe
// for compact unwind to reference addresses in __TEXT, but not addresses
// in any other segment.
if (referentIsec->getSegName() != segment_names::text)
error(isec->getLocation(r.offset) + " references section " +
referentIsec->getName() + " which is not in segment __TEXT");
Expand Down
145 changes: 44 additions & 101 deletions lld/MachO/UnwindInfoSection.cpp
Expand Up @@ -95,14 +95,22 @@ using namespace lld::macho;
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
// TODO(gkm): how do we align the 2nd-level pages?

template <class Ptr> struct CompactUnwindEntry {
template <class Ptr> struct CompactUnwindLayout {
Ptr functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Ptr personality;
Ptr lsda;
};

struct CompactUnwindEntry {
uint64_t functionAddress;
uint32_t functionLength;
compact_unwind_encoding_t encoding;
Symbol *personality;
InputSection *lsda;
};

using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;

struct SecondLevelPage {
Expand All @@ -118,8 +126,7 @@ template <class Ptr>
class UnwindInfoSectionImpl final : public UnwindInfoSection {
public:
void prepareRelocations(ConcatInputSection *) override;
void relocateCompactUnwind(std::vector<CompactUnwindEntry<Ptr>> &);
Reloc *findLsdaReloc(ConcatInputSection *) const;
void relocateCompactUnwind(std::vector<CompactUnwindEntry> &);
void encodePersonalities();
void finalize() override;
void writeTo(uint8_t *buf) const override;
Expand All @@ -129,11 +136,10 @@ class UnwindInfoSectionImpl final : public UnwindInfoSection {
EncodingMap commonEncodingIndexes;
// The entries here will be in the same order as their originating symbols
// in symbolsVec.
std::vector<CompactUnwindEntry<Ptr>> cuEntries;
std::vector<CompactUnwindEntry> cuEntries;
// Indices into the cuEntries vector.
std::vector<size_t> cuIndices;
// Indices of personality functions within the GOT.
std::vector<Ptr> personalities;
std::vector<Symbol *> personalities;
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
personalityTable;
// Indices into cuEntries for CUEs with a non-null LSDA.
Expand Down Expand Up @@ -206,8 +212,7 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
// compact unwind entries that references them, and thus appear as section
// relocs. There is no need to prepare them. We only prepare relocs for
// personality functions.
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
offsetof(CompactUnwindEntry<Ptr>, personality))
if (r.offset != offsetof(CompactUnwindLayout<Ptr>, personality))
continue;

if (auto *s = r.referent.dyn_cast<Symbol *>()) {
Expand Down Expand Up @@ -275,55 +280,35 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
}
}

// Unwind info lives in __DATA, and finalization of __TEXT will occur before
// finalization of __DATA. Moreover, the finalization of unwind info depends on
// the exact addresses that it references. So it is safe for compact unwind to
// reference addresses in __TEXT, but not addresses in any other segment.
static ConcatInputSection *
checkTextSegment(InputSection *isec, InputSection *cuIsec, uint64_t off) {
if (isec->getSegName() != segment_names::text)
error(cuIsec->getLocation(off) + " references section " + isec->getName() +
" which is not in segment __TEXT");
// __TEXT should always contain ConcatInputSections.
return cast<ConcatInputSection>(isec);
}

// We need to apply the relocations to the pre-link compact unwind section
// before converting it to post-link form. There should only be absolute
// relocations here: since we are not emitting the pre-link CU section, there
// is no source address to make a relative location meaningful.
template <class Ptr>
void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
std::vector<CompactUnwindEntry<Ptr>> &cuEntries) {
std::vector<CompactUnwindEntry> &cuEntries) {
parallelForEachN(0, symbolsVec.size(), [&](size_t i) {
uint8_t *buf = reinterpret_cast<uint8_t *>(cuEntries.data()) +
i * sizeof(CompactUnwindEntry<Ptr>);
CompactUnwindEntry &cu = cuEntries[i];
const Defined *d = symbolsVec[i].second;
// Write the functionAddress.
writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
cu.functionAddress = d->getVA();
if (!d->unwindEntry)
return;

// Write the rest of the CUE.
memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(),
d->unwindEntry->data.size());
auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
sizeof(Ptr);
cu.functionLength = support::endian::read32le(
buf + offsetof(CompactUnwindLayout<Ptr>, functionLength));
cu.encoding = support::endian::read32le(
buf + offsetof(CompactUnwindLayout<Ptr>, encoding));
for (const Reloc &r : d->unwindEntry->relocs) {
uint64_t referentVA = 0;
if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
if (!isa<Undefined>(referentSym)) {
if (auto *defined = dyn_cast<Defined>(referentSym))
checkTextSegment(defined->isec, d->unwindEntry, r.offset);
// At this point in the link, we may not yet know the final address of
// the GOT, so we just encode the index. We make it a 1-based index so
// that we can distinguish the null pointer case.
referentVA = referentSym->gotIndex + 1;
}
} else {
auto *referentIsec = r.referent.get<InputSection *>();
checkTextSegment(referentIsec, d->unwindEntry, r.offset);
referentVA = referentIsec->getVA(r.addend);
if (r.offset == offsetof(CompactUnwindLayout<Ptr>, personality)) {
cu.personality = r.referent.get<Symbol *>();
} else if (r.offset == offsetof(CompactUnwindLayout<Ptr>, lsda)) {
if (auto *referentSym = r.referent.dyn_cast<Symbol *>())
cu.lsda = cast<Defined>(referentSym)->isec;
else
cu.lsda = r.referent.get<InputSection *>();
}
writeAddress(buf + r.offset, referentVA, r.length);
}
});
}
Expand All @@ -332,8 +317,8 @@ void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
// encode them as 2-bit indices into a small array.
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
for (size_t idx : cuIndices) {
CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
if (cu.personality == 0)
CompactUnwindEntry &cu = cuEntries[idx];
if (cu.personality == nullptr)
continue;
// Linear search is fast enough for a small array.
auto it = find(personalities, cu.personality);
Expand All @@ -349,7 +334,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
}
if (personalities.size() > 3)
error("too many personalities (" + std::to_string(personalities.size()) +
error("too many personalities (" + Twine(personalities.size()) +
") for compact unwind to encode");
}

Expand Down Expand Up @@ -378,20 +363,6 @@ static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
return true;
}

template <class Ptr>
Reloc *
UnwindInfoSectionImpl<Ptr>::findLsdaReloc(ConcatInputSection *isec) const {
if (isec == nullptr)
return nullptr;
auto it = llvm::find_if(isec->relocs, [](const Reloc &r) {
return r.offset % sizeof(CompactUnwindEntry<Ptr>) ==
offsetof(CompactUnwindEntry<Ptr>, lsda);
});
if (it == isec->relocs.end())
return nullptr;
return &*it;
}

// Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame.
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
Expand Down Expand Up @@ -434,25 +405,9 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding &&
cuEntries[*foldBegin].personality ==
cuEntries[*foldEnd].personality &&
canFoldEncoding(cuEntries[*foldEnd].encoding)) {
// In most cases, we can just compare the values of cuEntries[*].lsda.
// However, it is possible for -rename_section to cause the LSDA section
// (__gcc_except_tab) to be finalized after the unwind info section. In
// that case, we don't yet have unique addresses for the LSDA entries.
// So we check their relocations instead.
// FIXME: should we account for an LSDA at an absolute address? ld64 seems
// to support it, but it seems unlikely to be used in practice.
Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry);
Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry);
if (lsda1 == nullptr && lsda2 == nullptr)
continue;
if (lsda1 == nullptr || lsda2 == nullptr)
break;
if (lsda1->referent != lsda2->referent)
break;
if (lsda1->addend != lsda2->addend)
break;
}
cuEntries[*foldBegin].lsda == cuEntries[*foldEnd].lsda &&
canFoldEncoding(cuEntries[*foldEnd].encoding))
;
*foldWrite++ = *foldBegin;
foldBegin = foldEnd;
}
Expand Down Expand Up @@ -510,7 +465,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
sizeof(uint32_t);
while (wordsRemaining >= 1 && i < cuIndices.size()) {
idx = cuIndices[i];
const CompactUnwindEntry<Ptr> *cuPtr = &cuEntries[idx];
const CompactUnwindEntry *cuPtr = &cuEntries[idx];
if (cuPtr->functionAddress >= functionAddressMax) {
break;
} else if (commonEncodingIndexes.count(cuPtr->encoding) ||
Expand Down Expand Up @@ -545,8 +500,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {

for (size_t idx : cuIndices) {
lsdaIndex[idx] = entriesWithLsda.size();
const Defined *d = symbolsVec[idx].second;
if (findLsdaReloc(d->unwindEntry))
if (cuEntries[idx].lsda)
entriesWithLsda.push_back(idx);
}

Expand Down Expand Up @@ -588,9 +542,8 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
*i32p++ = encoding.first;

// Personalities
for (Ptr personality : personalities)
*i32p++ =
in.got->addr + (personality - 1) * target->wordSize - in.header->addr;
for (const Symbol *personality : personalities)
*i32p++ = personality->getGotVA() - in.header->addr;

// Level-1 index
uint32_t lsdaOffset =
Expand All @@ -609,7 +562,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
}
// Level-1 sentinel
const CompactUnwindEntry<Ptr> &cuEnd = cuEntries[cuIndices.back()];
const CompactUnwindEntry &cuEnd = cuEntries[cuIndices.back()];
iep->functionOffset =
cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
iep->secondLevelPagesSectionOffset = 0;
Expand All @@ -622,18 +575,8 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
auto *lep =
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
for (size_t idx : entriesWithLsda) {
const CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
const Defined *d = symbolsVec[idx].second;
if (Reloc *r = findLsdaReloc(d->unwindEntry)) {
uint64_t va;
if (auto *isec = r->referent.dyn_cast<InputSection *>()) {
va = isec->getVA(r->addend);
} else {
auto *sym = r->referent.get<Symbol *>();
va = sym->getVA() + r->addend;
}
lep->lsdaOffset = va - in.header->addr;
}
const CompactUnwindEntry &cu = cuEntries[idx];
lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr;
lep->functionOffset = cu.functionAddress - in.header->addr;
lep++;
}
Expand All @@ -656,7 +599,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
p2p->encodingsCount = page.localEncodings.size();
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry<Ptr> &cue =
const CompactUnwindEntry &cue =
cuEntries[cuIndices[page.entryIndex + i]];
auto it = commonEncodingIndexes.find(cue.encoding);
if (it == commonEncodingIndexes.end())
Expand All @@ -676,7 +619,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
p2p->entryCount = page.entryCount;
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
const CompactUnwindEntry<Ptr> &cue =
const CompactUnwindEntry &cue =
cuEntries[cuIndices[page.entryIndex + i]];
*ep++ = cue.functionAddress;
*ep++ = cue.encoding;
Expand Down
8 changes: 8 additions & 0 deletions lld/test/MachO/compact-unwind.s
Expand Up @@ -81,6 +81,7 @@
.globl _my_personality, _exception0
.text
.p2align 2
.no_dead_strip _foo
_foo:
.cfi_startproc
## This will generate a section relocation.
Expand All @@ -91,6 +92,7 @@ _foo:
.cfi_endproc

.p2align 2
.no_dead_strip _bar
_bar:
.cfi_startproc
## Check that we dedup references to the same statically-linked personality.
Expand All @@ -100,14 +102,20 @@ _bar:
ret
.cfi_endproc

.data
.p2align 2
## We put this personality in `__data` to test if we correctly handle
## personality symbols whose output addresses occur after that of the
## `__unwind_info` section.
_my_personality:
ret

.section __TEXT,__gcc_except_tab
_exception0:
.space 1

.subsections_via_symbols

#--- main.s
.globl _main, _quux, _my_personality, _exception1

Expand Down
18 changes: 6 additions & 12 deletions lld/test/MachO/invalid/compact-unwind-bad-reloc.s
@@ -1,10 +1,9 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-function.s -o %t/bad-function.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-personality.s -o %t/bad-personality.o
# RUN: not %lld -lSystem -lc++ %t/bad-function.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-function.o -D#OFF=0x0
# RUN: not %lld -lSystem -lc++ %t/bad-personality.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-personality.o -D#OFF=0x10
# CHECK: error: [[FILE]]:(__compact_unwind+0x[[#%x,OFF]]) references section __data which is not in segment __TEXT
# RUN: not %lld -lSystem -dylib -lc++ %t/bad-function.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: {{.*}}bad-function.o:(__compact_unwind+0x0) references section __data which is not in segment __TEXT
# CHECK: error: {{.*}}bad-function.o:(__compact_unwind+0x20) references section __data which is not in segment __TEXT

#--- bad-function.s
.data
Expand All @@ -15,16 +14,11 @@ _not_a_function:
retq
.cfi_endproc

#--- bad-personality.s
.globl _main, _not_a_function
.text
_main:
_not_a_function_2:
.cfi_startproc
.cfi_personality 155, _my_personality
.cfi_personality 155, ___gxx_personality_v0
.cfi_def_cfa_offset 16
retq
.cfi_endproc

.data
.globl _my_personality
_my_personality:
.subsections_via_symbols

0 comments on commit 82dcf30

Please sign in to comment.