diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index aaa914e21c365..008746f53b38b 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -227,6 +227,20 @@ Optional macho::readFile(StringRef path) { InputFile::InputFile(Kind kind, const InterfaceFile &interface) : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {} +// Some sections comprise of fixed-size records, so instead of splitting them at +// symbol boundaries, we split them based on size. Records are distinct from +// literals in that they may contain references to other sections, instead of +// being leaf nodes in the InputSection graph. +// +// Note that "record" is a term I came up with. In contrast, "literal" is a term +// used by the Mach-O format. +static Optional getRecordSize(StringRef segname, StringRef name) { + if (name == section_names::cfString) + if (config->icfLevel != ICFLevel::none && segname == segment_names::data) + return target->wordSize == 8 ? 32 : 16; + return {}; +} + template void ObjFile::parseSections(ArrayRef
sections) { subsections.reserve(sections.size()); @@ -249,6 +263,24 @@ void ObjFile::parseSections(ArrayRef
sections) { uint32_t align = 1 << sec.align; uint32_t flags = sec.flags; + auto splitRecords = [&](int recordSize) -> void { + subsections.push_back({}); + if (data.size() == 0) + return; + + SubsectionMap &subsecMap = subsections.back(); + subsecMap.reserve(data.size() / recordSize); + auto *isec = make( + segname, name, this, data.slice(0, recordSize), align, flags); + subsecMap.push_back({0, isec}); + for (uint64_t off = recordSize; off < data.size(); off += recordSize) { + // Copying requires less memory than constructing a fresh InputSection. + auto *copy = make(*isec); + copy->data = data.slice(off, recordSize); + subsecMap.push_back({off, copy}); + } + }; + if (sectionType(sec.flags) == S_CSTRING_LITERALS || (config->dedupLiterals && isWordLiteralSection(sec.flags))) { if (sec.nreloc && config->dedupLiterals) @@ -268,17 +300,8 @@ void ObjFile::parseSections(ArrayRef
sections) { flags); } subsections.push_back({{0, isec}}); - } else if (config->icfLevel != ICFLevel::none && - (name == section_names::cfString && - segname == segment_names::data)) { - uint64_t literalSize = target->wordSize == 8 ? 32 : 16; - subsections.push_back({}); - SubsectionMap &subsecMap = subsections.back(); - for (uint64_t off = 0; off < data.size(); off += literalSize) - subsecMap.push_back( - {off, make(segname, name, this, - data.slice(off, literalSize), align, - flags)}); + } else if (auto recordSize = getRecordSize(segname, name)) { + splitRecords(*recordSize); } else if (segname == segment_names::llvm) { // ld64 does not appear to emit contents from sections within the __LLVM // segment. Symbols within those sections point to bitcode metadata