Skip to content

Commit

Permalink
[lld-macho] Implement support for ObjC relative method lists
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex B committed Mar 22, 2024
1 parent b8e5363 commit d30f82e
Show file tree
Hide file tree
Showing 11 changed files with 543 additions and 7 deletions.
1 change: 1 addition & 0 deletions lld/MachO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ struct Configuration {
bool emitEncryptionInfo = false;
bool emitInitOffsets = false;
bool emitChainedFixups = false;
bool emitRelativeMethodLists = false;
bool thinLTOEmitImportsFiles;
bool thinLTOEmitIndexFiles;
bool thinLTOIndexOnly;
Expand Down
17 changes: 17 additions & 0 deletions lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1086,6 +1086,22 @@ static bool shouldEmitChainedFixups(const InputArgList &args) {
return isRequested;
}

static bool shouldEmitRelativeMethodLists(const InputArgList &args) {
const Arg *arg = args.getLastArg(OPT_objc_relative_method_lists,
OPT_no_objc_relative_method_lists);
if (arg && arg->getOption().getID() == OPT_objc_relative_method_lists)
return true;
if (arg && arg->getOption().getID() == OPT_no_objc_relative_method_lists)
return false;

// TODO: If no flag is specified, don't default to false, but instead:
// - default false on < ios14
// - default true on >= ios14
// For now, until this feature is confirmed stable, default to false if no
// flag is explicitly specified
return false;
}

void SymbolPatterns::clear() {
literals.clear();
globs.clear();
Expand Down Expand Up @@ -1629,6 +1645,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->emitChainedFixups = shouldEmitChainedFixups(args);
config->emitInitOffsets =
config->emitChainedFixups || args.hasArg(OPT_init_offsets);
config->emitRelativeMethodLists = shouldEmitRelativeMethodLists(args);
config->icfLevel = getICFLevel(args);
config->dedupStrings =
args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);
Expand Down
8 changes: 8 additions & 0 deletions lld/MachO/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ void lld::macho::addInputSection(InputSection *inputSection) {
if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
if (isec->isCoalescedWeak())
return;
if (config->emitRelativeMethodLists &&
ObjCMethListSection::isMethodList(isec)) {
if (in.objcMethList->inputOrder == UnspecifiedInputOrder)
in.objcMethList->inputOrder = inputSectionsOrder++;
in.objcMethList->addInput(isec);
isec->parent = in.objcMethList;
return;
}
if (config->emitInitOffsets &&
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
in.initOffsets->addInput(isec);
Expand Down
1 change: 1 addition & 0 deletions lld/MachO/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ constexpr const char moduleTermFunc[] = "__mod_term_func";
constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
constexpr const char objcCatList[] = "__objc_catlist";
constexpr const char objcClassList[] = "__objc_classlist";
constexpr const char objcMethList[] = "__objc_methlist";
constexpr const char objcClassRefs[] = "__objc_classrefs";
constexpr const char objcConst[] = "__objc_const";
constexpr const char objCImageInfo[] = "__objc_imageinfo";
Expand Down
22 changes: 15 additions & 7 deletions lld/MachO/MapFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,18 +197,24 @@ void macho::writeMapFile() {
seg->name.str().c_str(), osec->name.str().c_str());
}

// Shared function to print an array of symbols.
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
for (const ConcatInputSection *isec : arr) {
for (Defined *sym : isec->symbols) {
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
sym->size, readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
}
};

os << "# Symbols:\n";
os << "# Address\tSize \tFile Name\n";
for (const OutputSegment *seg : outputSegments) {
for (const OutputSection *osec : seg->getSections()) {
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
for (const InputSection *isec : concatOsec->inputs) {
for (Defined *sym : isec->symbols)
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
sym->size, readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
printIsecArrSyms(concatOsec->inputs);
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
uint64_t lastAddr = 0; // strings will never start at address 0, so this
Expand Down Expand Up @@ -237,6 +243,8 @@ void macho::writeMapFile() {
printNonLazyPointerSection(os, in.got);
} else if (osec == in.tlvPointers) {
printNonLazyPointerSection(os, in.tlvPointers);
} else if (osec == in.objcMethList) {
printIsecArrSyms(in.objcMethList->getInputs());
}
// TODO print other synthetic sections
}
Expand Down
2 changes: 2 additions & 0 deletions lld/MachO/ObjC.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ constexpr const char klassPropList[] = "__OBJC_$_CLASS_PROP_LIST_";
constexpr const char metaclass[] = "_OBJC_METACLASS_$_";
constexpr const char ehtype[] = "_OBJC_EHTYPE_$_";
constexpr const char ivar[] = "_OBJC_IVAR_$_";
constexpr const char instanceMethods[] = "__OBJC_$_INSTANCE_METHODS_";
constexpr const char classMethods[] = "__OBJC_$_CLASS_METHODS_";
constexpr const char listProprieties[] = "__OBJC_$_PROP_LIST_";

constexpr const char category[] = "__OBJC_$_CATEGORY_";
Expand Down
6 changes: 6 additions & 0 deletions lld/MachO/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,12 @@ def fixup_chains_section : Flag<["-"], "fixup_chains_section">,
HelpText<"This option is undocumented in ld64">,
Flags<[HelpHidden]>,
Group<grp_undocumented>;
def objc_relative_method_lists : Flag<["-"], "objc_relative_method_lists">,
HelpText<"Emit relative method lists (more compact representation)">,
Group<grp_undocumented>;
def no_objc_relative_method_lists : Flag<["-"], "no_objc_relative_method_lists">,
HelpText<"Don't emit relative method lists (use traditional representation)">,
Group<grp_undocumented>;
def flto_codegen_only : Flag<["-"], "flto-codegen-only">,
HelpText<"This option is undocumented in ld64">,
Flags<[HelpHidden]>,
Expand Down
229 changes: 229 additions & 0 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "ExportTrie.h"
#include "InputFiles.h"
#include "MachOStructs.h"
#include "ObjC.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
Expand Down Expand Up @@ -1974,6 +1975,234 @@ void InitOffsetsSection::setUp() {
}
}

ObjCMethListSection::ObjCMethListSection()
: SyntheticSection(segment_names::text, section_names::objcMethList) {
flags = S_ATTR_NO_DEAD_STRIP;
align = m_align;
}

// Go through all input method lists and ensure that we have selrefs for all
// their method names. The selrefs will be needed later by ::writeTo. We need to
// create them early on here to ensure they are processed correctly by the lld
// pipeline.
void ObjCMethListSection::setUp() {
for (const ConcatInputSection *isec : inputs) {
uint32_t structSizeAndFlags = 0, structCount = 0;
readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
uint32_t structSize = structSizeAndFlags & m_structSizeMask;

// Method name is immediately after header
uint32_t methodNameOff = m_methodListHeaderSize;

// Loop through all methods, and ensure a selref for each of them exists.
while (methodNameOff < isec->data.size()) {
const Reloc *reloc = isec->getRelocAt(methodNameOff);
assert(reloc && "Relocation expected at method list name slot");
auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
assert(def && "Expected valid Defined at method list name slot");
auto *cisec = cast<CStringInputSection>(def->isec);
assert(cisec && "Expected method name to be in a CStringInputSection");
auto methname = cisec->getStringRefAtOffset(def->value);
if (!in.objcSelRefs->getSelRef(methname))
in.objcSelRefs->makeSelRef(methname);

// Jump to method name offset in next struct
methodNameOff += structSize;
}
}
}

// Calculate section size and final offsets for where InputSection's need to be
// written.
void ObjCMethListSection::finalize() {
// m_size will be the total size of the __objc_methlist section
m_size = 0;
for (ConcatInputSection *isec : inputs) {
// We can also use m_size as write offset for isec
assert(m_size == alignToPowerOf2(m_size, m_align) &&
"expected __objc_methlist to be aligned by default with the "
"required section alignment");
isec->outSecOff = m_size;

isec->isFinal = true;
uint32_t relativeListSize =
methodListSizeToRelativeMethodListSize(isec->data.size());
m_size += relativeListSize;

// If encoding the method list in relative offset format shrinks the size,
// then we also need to adjust symbol sizes to match the new size. Note that
// on 32bit platforms the size of the method list will remain the same when
// encoded in relative offset format.
if (relativeListSize != isec->data.size()) {
for (Symbol *sym : isec->symbols) {
assert(isa<Defined>(sym) &&
"Unexpected undefined symbol in ObjC method list");
auto *def = cast<Defined>(sym);
// There can be 0-size symbols, check if this is the case and ignore
// them.
if (def->size) {
assert(
def->size == isec->data.size() &&
"Invalid ObjC method list symbol size: expected symbol size to "
"match isec size");
def->size = relativeListSize;
}
}
}
}
}

void ObjCMethListSection::writeTo(uint8_t *bufStart) const {
uint8_t *buf = bufStart;
for (const ConcatInputSection *isec : inputs) {
assert(buf - bufStart == long(isec->outSecOff) &&
"Writing at unexpected offset");
uint32_t writtenSize = writeRelativeMethodList(isec, buf);
buf += writtenSize;
}
assert(buf - bufStart == m_size &&
"Written size does not match expected section size");
}

// Check if an InputSection is a method list. To do this we scan the
// InputSection for any symbols who's names match the patterns we expect clang
// to generate for method lists.
bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
const char *symPrefixes[] = {objc::symbol_names::classMethods,
objc::symbol_names::instanceMethods,
objc::symbol_names::categoryInstanceMethods,
objc::symbol_names::categoryClassMethods};
if (!isec)
return false;
for (const Symbol *sym : isec->symbols) {
auto *def = dyn_cast_or_null<Defined>(sym);
if (!def)
continue;
for (const char *prefix : symPrefixes) {
if (def->getName().starts_with(prefix)) {
assert(def->size == isec->data.size() &&
"Invalid ObjC method list symbol size: expected symbol size to "
"match isec size");
assert(def->value == 0 &&
"Offset of ObjC method list symbol must be 0");
return true;
}
}
}

return false;
}

// Encode a single relative offset value. The input is the data/symbol at
// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
// 'createSelRef' indicates that we should not directly use the specified
// symbol, but instead get the selRef for the symbol and use that instead.
void ObjCMethListSection::writeRelativeOffsetForIsec(
const ConcatInputSection *isec, uint8_t *buf, uint32_t &inSecOff,
uint32_t &outSecOff, bool useSelRef) const {
const Reloc *reloc = isec->getRelocAt(inSecOff);
assert(reloc && "Relocation expected at __objc_methlist Offset");
auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
assert(def && "Expected all syms in __objc_methlist to be defined");
uint32_t symVA = def->getVA();

if (useSelRef) {
auto *cisec = cast<CStringInputSection>(def->isec);
auto methname = cisec->getStringRefAtOffset(def->value);
ConcatInputSection *selRef = in.objcSelRefs->getSelRef(methname);
assert(selRef && "Expected all selector names to already be already be "
"present in __objc_selrefs");
symVA = selRef->getVA();
assert(selRef->data.size() == sizeof(target->wordSize) &&
"Expected one selref per ConcatInputSection");
}

uint32_t currentVA = isec->getVA() + outSecOff;
uint32_t delta = symVA - currentVA;
write32le(buf + outSecOff, delta);

inSecOff += target->wordSize;
outSecOff += sizeof(uint32_t);
}

// Write a relative method list to buf, return the size of the written
// information
uint32_t
ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
uint8_t *buf) const {
// Copy over the header, and add the "this is a relative method list" magic
// value flag
uint32_t structSizeAndFlags = 0, structCount = 0;
readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount);
structSizeAndFlags |= m_relMethodHeaderFlag;
writeMethodListHeader(buf, structSizeAndFlags, structCount);

assert(m_methodListHeaderSize +
(structCount * m_pointersPerStruct * target->wordSize) ==
isec->data.size() &&
"Invalid computed ObjC method list size");

uint32_t inSecOff = m_methodListHeaderSize;
uint32_t outSecOff = m_methodListHeaderSize;

// Go through the method list and encode input absolute pointers as relative
// offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
// outSecOff
for (uint32_t i = 0; i < structCount; i++) {
// Write the name of the method
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, true);
// Write the type of the method
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
// Write reference to the selector of the method
writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false);
}

// Expecting to have read all the data in the isec
assert(inSecOff == isec->data.size() &&
"Invalid actual ObjC method list size");
assert(
outSecOff == methodListSizeToRelativeMethodListSize(inSecOff) &&
"Mismatch between input & output size when writing relative method list");
return outSecOff;
}

// Given the size of an ObjC method list InputSection, return the size of the
// method list when encoded in relative offsets format. We can do this without
// decoding the actual data, as it can be directly infered from the size of the
// isec.
uint32_t
ObjCMethListSection::methodListSizeToRelativeMethodListSize(uint32_t iSecSize) {
uint32_t oldPointersSize = iSecSize - m_methodListHeaderSize;
uint32_t pointerCount = oldPointersSize / target->wordSize;
assert(((pointerCount % m_pointersPerStruct) == 0) &&
"__objc_methlist expects method lists to have multiple-of-3 pointers");

constexpr uint32_t sizeOfRelativeOffset = sizeof(uint32_t);
uint32_t newPointersSize = pointerCount * sizeOfRelativeOffset;
uint32_t newTotalSize = m_methodListHeaderSize + newPointersSize;

assert((newTotalSize <= iSecSize) &&
"Expected relative method list size to be smaller or equal than "
"original size");
return newTotalSize;
}

// Read a method list header from buf
void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
uint32_t &structSizeAndFlags,
uint32_t &structCount) {
structSizeAndFlags = read32le(buf);
structCount = read32le(buf + sizeof(uint32_t));
}

// Write a method list header to buf
void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
uint32_t structSizeAndFlags,
uint32_t structCount) {
write32le(buf, structSizeAndFlags);
write32le(buf + sizeof(structSizeAndFlags), structCount);
}

void macho::createSyntheticSymbols() {
auto addHeaderSymbol = [](const char *name) {
symtab->addSynthetic(name, in.header->isec, /*value=*/0,
Expand Down

0 comments on commit d30f82e

Please sign in to comment.