Skip to content

Commit

Permalink
[lld-macho][NFC] Preserve original symbol isec, unwindEntry and size (#…
Browse files Browse the repository at this point in the history
…88357)

Currently, when moving symbols from one `InputSection` to another (like
in ICF) we directly update the symbol's `isec`, `unwindEntry` and
`size`. By doing this we lose the original information. This information
will be needed in a future change. Since when moving symbols we always
set the symbol's `wasCoalesced` and `isec-> replacement`, we can just
use this info to conditionally get the information we need at access
time.
  • Loading branch information
alx32 committed Apr 18, 2024
1 parent a71c1b3 commit 2a3a79c
Show file tree
Hide file tree
Showing 14 changed files with 134 additions and 120 deletions.
32 changes: 16 additions & 16 deletions lld/MachO/ICF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,13 @@ bool ICF::equalsConstant(const ConcatInputSection *ia,
assert(isa<Defined>(sa));
const auto *da = cast<Defined>(sa);
const auto *db = cast<Defined>(sb);
if (!da->isec || !db->isec) {
if (!da->isec() || !db->isec()) {
assert(da->isAbsolute() && db->isAbsolute());
return da->value + ra.addend == db->value + rb.addend;
}
isecA = da->isec;
isecA = da->isec();
valueA = da->value;
isecB = db->isec;
isecB = db->isec();
valueB = db->value;
} else {
isecA = ra.referent.get<InputSection *>();
Expand Down Expand Up @@ -191,10 +191,10 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
if (da->isAbsolute())
return true;
isecA = dyn_cast<ConcatInputSection>(da->isec);
isecA = dyn_cast<ConcatInputSection>(da->isec());
if (!isecA)
return true; // literal sections were checked in equalsConstant.
isecB = cast<ConcatInputSection>(db->isec);
isecB = cast<ConcatInputSection>(db->isec());
} else {
const auto *sa = ra.referent.get<InputSection *>();
const auto *sb = rb.referent.get<InputSection *>();
Expand All @@ -212,7 +212,7 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
// info matches. For simplicity, we only handle the case where there are only
// symbols at offset zero within the section (which is typically the case with
// .subsections_via_symbols.)
auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
if (itA == ia->symbols.end())
Expand All @@ -221,8 +221,8 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
return false;
const Defined *da = *itA;
const Defined *db = *itB;
if (da->unwindEntry->icfEqClass[icfPass % 2] !=
db->unwindEntry->icfEqClass[icfPass % 2] ||
if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
db->unwindEntry()->icfEqClass[icfPass % 2] ||
da->value != 0 || db->value != 0)
return false;
auto isZero = [](Defined *d) { return d->value == 0; };
Expand Down Expand Up @@ -289,13 +289,13 @@ void ICF::run() {
for (const Reloc &r : isec->relocs) {
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isec) {
if (defined->isec()) {
if (auto *referentIsec =
dyn_cast<ConcatInputSection>(defined->isec))
dyn_cast<ConcatInputSection>(defined->isec()))
hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
else
hash += defined->isec->kind() +
defined->isec->getOffset(defined->value);
hash += defined->isec()->kind() +
defined->isec()->getOffset(defined->value);
} else {
hash += defined->value;
}
Expand Down Expand Up @@ -368,8 +368,8 @@ void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {

void macho::markSymAsAddrSig(Symbol *s) {
if (auto *d = dyn_cast_or_null<Defined>(s))
if (d->isec)
d->isec->keepUnique = true;
if (d->isec())
d->isec()->keepUnique = true;
}

void macho::markAddrSigSymbols() {
Expand Down Expand Up @@ -430,8 +430,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
if (isFoldable) {
foldable.push_back(isec);
for (Defined *d : isec->symbols)
if (d->unwindEntry)
foldable.push_back(d->unwindEntry);
if (d->unwindEntry())
foldable.push_back(d->unwindEntry());

// Some sections have embedded addends that foil ICF's hashing / equality
// checks. (We can ignore embedded addends when doing ICF because the same
Expand Down
19 changes: 10 additions & 9 deletions lld/MachO/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
continue;
}
add += sym->value;
referentIsec = cast<ConcatInputSection>(sym->isec);
referentIsec = cast<ConcatInputSection>(sym->isec());
} else {
referentIsec =
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
Expand All @@ -1191,7 +1191,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
++it;
continue;
}
d->unwindEntry = isec;
d->originalUnwindEntry = isec;
// Now that the symbol points to the unwind entry, we can remove the reloc
// that points from the unwind entry back to the symbol.
//
Expand Down Expand Up @@ -1348,7 +1348,7 @@ targetSymFromCanonicalSubtractor(const InputSection *isec,
}
if (Invert)
std::swap(pcSym, target);
if (pcSym->isec == isec) {
if (pcSym->isec() == isec) {
if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
fatal("invalid FDE relocation in __eh_frame");
} else {
Expand Down Expand Up @@ -1420,7 +1420,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
// We already have an explicit relocation for the CIE offset.
cieIsec =
targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
->isec;
->isec();
dataOff += sizeof(uint32_t);
} else {
// If we haven't found a relocation, then the CIE offset is most likely
Expand Down Expand Up @@ -1480,15 +1480,15 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
// to register the unwind entry under same symbol.
// This is not particularly efficient, but we should run into this case
// infrequently (only when handling the output of `ld -r`).
if (funcSym->isec)
funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec),
if (funcSym->isec())
funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec()),
funcSym->value);
} else {
funcSym = findSymbolAtAddress(sections, funcAddr);
ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
}
// The symbol has been coalesced, or already has a compact unwind entry.
if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry()) {
// We must prune unused FDEs for correctness, so we cannot rely on
// -dead_strip being enabled.
isec->live = false;
Expand All @@ -1497,7 +1497,8 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {

InputSection *lsdaIsec = nullptr;
if (lsdaAddrRelocIt != isec->relocs.end()) {
lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
lsdaIsec =
targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec();
} else if (lsdaAddrOpt) {
uint64_t lsdaAddr = *lsdaAddrOpt;
Section *sec = findContainingSection(sections, &lsdaAddr);
Expand All @@ -1507,7 +1508,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
}

fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
funcSym->unwindEntry = isec;
funcSym->originalUnwindEntry = isec;
ehRelocator.commit();
}

Expand Down
6 changes: 2 additions & 4 deletions lld/MachO/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,8 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
copy->live = false;
copy->wasCoalesced = true;
copy->replacement = this;
for (auto &copySym : copy->symbols) {
for (auto &copySym : copy->symbols)
copySym->wasIdenticalCodeFolded = true;
copySym->size = 0;
}

symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
copy->symbols.clear();
Expand All @@ -207,7 +205,7 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
return;
for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) {
assert((*it)->value == 0);
(*it)->unwindEntry = nullptr;
(*it)->originalUnwindEntry = nullptr;
}
}

Expand Down
17 changes: 12 additions & 5 deletions lld/MachO/MapFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ static MapInfo gatherMapInfo() {
// Only emit the prevailing definition of a symbol. Also, don't emit
// the symbol if it is part of a cstring section (we use the literal
// value instead, similar to ld64)
if (d->isec && d->getFile() == file &&
!isa<CStringInputSection>(d->isec)) {
if (d->isec() && d->getFile() == file &&
!isa<CStringInputSection>(d->isec())) {
isReferencedFile = true;
if (!d->isLive())
info.deadSymbols.push_back(d);
Expand Down Expand Up @@ -155,6 +155,12 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
target->wordSize, sym->getName().str().data());
}

static uint64_t getSymSizeForMap(Defined *sym) {
if (sym->wasIdenticalCodeFolded)
return 0;
return sym->size;
}

void macho::writeMapFile() {
if (config->mapFile.empty())
return;
Expand Down Expand Up @@ -201,9 +207,10 @@ void macho::writeMapFile() {
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
for (const ConcatInputSection *isec : arr) {
for (Defined *sym : isec->symbols) {
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
sym->size, readerToFileOrdinal[sym->getFile()],
getSymSizeForMap(sym),
readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
}
Expand Down Expand Up @@ -255,7 +262,7 @@ void macho::writeMapFile() {
os << "# \tSize \tFile Name\n";
for (Defined *sym : info.deadSymbols) {
assert(!sym->isLive());
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym),
readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
Expand Down
10 changes: 5 additions & 5 deletions lld/MachO/MarkLive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ void MarkLiveImpl<RecordWhyLive>::addSym(
if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
printWhyLive(s, prev);
if (auto *d = dyn_cast<Defined>(s)) {
if (d->isec)
enqueue(d->isec, d->value, prev);
if (d->unwindEntry)
enqueue(d->unwindEntry, 0, prev);
if (d->isec())
enqueue(d->isec(), d->value, prev);
if (d->unwindEntry())
enqueue(d->unwindEntry(), 0, prev);
}
}

Expand Down Expand Up @@ -179,7 +179,7 @@ void MarkLiveImpl<RecordWhyLive>::markTransitively() {
if (s->isLive()) {
InputSection *referentIsec = nullptr;
if (auto *d = dyn_cast<Defined>(s))
referentIsec = d->isec;
referentIsec = d->isec();
enqueue(isec, 0, makeEntry(referentIsec, nullptr));
}
} else {
Expand Down
36 changes: 18 additions & 18 deletions lld/MachO/ObjC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ static StringRef getReferentString(const Reloc &r) {
if (auto *isec = r.referent.dyn_cast<InputSection *>())
return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
auto *sym = cast<Defined>(r.referent.get<Symbol *>());
return cast<CStringInputSection>(sym->isec)->getStringRefAtOffset(sym->value +
r.addend);
return cast<CStringInputSection>(sym->isec())
->getStringRefAtOffset(sym->value + r.addend);
}

void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
Expand Down Expand Up @@ -306,15 +306,15 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
return nullptr;
};

const auto *classIsec = cast<ConcatInputSection>(classSym->isec);
const auto *classIsec = cast<ConcatInputSection>(classSym->isec());

// Parse instance methods.
if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
MK_Instance);

// Class methods are contained in the metaclass.
if (const auto *r = classSym->isec->getRelocAt(classLayout.metaClassOffset))
if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
if (const auto *classMethodsIsec = getMethodsIsec(
cast<ConcatInputSection>(r->getReferentInputSection())))
parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
Expand Down Expand Up @@ -561,9 +561,9 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
if (!sym)
return;

if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec))
if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
eraseISec(cisec);
else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
uint32_t totalOffset = sym->value + reloc->addend;
StringPiece &piece = csisec->getStringPiece(totalOffset);
piece.live = false;
Expand All @@ -588,7 +588,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
assert(catNameSym && "Category does not have a valid name Symbol");

collectSectionWriteInfoFromIsec<CStringSection>(
catNameSym->isec, infoCategoryWriter.catNameInfo);
catNameSym->isec(), infoCategoryWriter.catNameInfo);
}

// Collect writer info from all the category lists (we're assuming they all
Expand All @@ -599,7 +599,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
if (Defined *ptrList =
tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
collectSectionWriteInfoFromIsec<ConcatOutputSection>(
ptrList->isec, infoCategoryWriter.catPtrListInfo);
ptrList->isec(), infoCategoryWriter.catPtrListInfo);
// we've successfully collected data, so we can break
break;
}
Expand Down Expand Up @@ -627,7 +627,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
// platform pointer size, but to simplify implementation we always just read
// the lower 32b which should be good enough.
uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);

ptrList.structCount += protocolCount;
ptrList.structSize = target->wordSize;
Expand All @@ -636,15 +636,15 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
(protocolCount * target->wordSize) +
/*header(count)*/ protocolListHeaderLayout.totalSize +
/*extra null value*/ target->wordSize;
assert(expectedListSize == ptrListSym->isec->data.size() &&
assert(expectedListSize == ptrListSym->isec()->data.size() &&
"Protocol list does not match expected size");

// Suppress unsuded var warning
(void)expectedListSize;

uint32_t off = protocolListHeaderLayout.totalSize;
for (uint32_t inx = 0; inx < protocolCount; ++inx) {
const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
assert(reloc && "No reloc found at protocol list offset");

auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
Expand All @@ -653,7 +653,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
ptrList.allPtrs.push_back(listSym);
off += target->wordSize;
}
assert((ptrListSym->isec->getRelocAt(off) == nullptr) &&
assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
"expected null terminating protocol");
assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
"Protocol list end offset does not match expected size");
Expand All @@ -678,9 +678,9 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
assert(ptrListSym && "Reloc does not have a valid Defined");

uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
ptrListSym->isec->data.data() + listHeaderLayout.structCountOffset);
ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);

assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
Expand All @@ -690,12 +690,12 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,

uint32_t expectedListSize =
listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
assert(expectedListSize == ptrListSym->isec->data.size() &&
assert(expectedListSize == ptrListSym->isec()->data.size() &&
"Pointer list does not match expected size");

for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
off += target->wordSize) {
const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
assert(reloc && "No reloc found at pointer list offset");

auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
Expand Down Expand Up @@ -1054,7 +1054,7 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
r.offset = offset;
r.addend = 0;
r.referent = const_cast<Symbol *>(refTo);
refFrom->isec->relocs.push_back(r);
refFrom->isec()->relocs.push_back(r);
}

void ObjcCategoryMerger::collectAndValidateCategoriesData() {
Expand All @@ -1076,7 +1076,7 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
if (!categorySym->getName().starts_with(objc::symbol_names::category))
continue;

auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec);
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
assert(catBodyIsec &&
"Category data section is not an ConcatInputSection");

Expand Down

0 comments on commit 2a3a79c

Please sign in to comment.