diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 46ff984a0ef71a..267adbc1858878 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1119,12 +1119,9 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, TimeTraceScope timeScope("Gathering input sections"); // Gather all InputSections into one vector. for (const InputFile *file : inputFiles) { - for (const SubsectionMap &map : file->subsections) { - for (const auto &p : map) { - InputSection *isec = p.second; - inputSections.push_back(isec); - } - } + for (const SubsectionMapping &map : file->subsections) + for (const SubsectionEntry &subsectionEntry : map) + inputSections.push_back(subsectionEntry.isec); } } diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 4ef56eaccde416..d37db3619b2955 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -194,11 +194,14 @@ void ObjFile::parseSections(ArrayRef sections) { // any subsection splitting has occurred). It will be updated to represent the // same location as an offset relative to the start of the containing // subsection. -static InputSection *findContainingSubsection(SubsectionMap &map, - uint32_t *offset) { - auto it = std::prev(map.upper_bound(*offset)); - *offset -= it->first; - return it->second; +static InputSection *findContainingSubsection(SubsectionMapping &map, + uint64_t *offset) { + auto it = std::prev(llvm::upper_bound( + map, *offset, [](uint64_t value, SubsectionEntry subsectionEntry) { + return value < subsectionEntry.offset; + })); + *offset -= it->offset; + return it->isec; } static bool validateRelocationInfo(InputFile *file, const section_64 &sec, @@ -233,7 +236,7 @@ static bool validateRelocationInfo(InputFile *file, const section_64 &sec, } void ObjFile::parseRelocations(const section_64 &sec, - SubsectionMap &subsecMap) { + SubsectionMapping &subsecMap) { auto *buf = reinterpret_cast(mb.getBufferStart()); ArrayRef relInfos( reinterpret_cast(buf + sec.reloff), sec.nreloc); @@ -288,9 +291,10 @@ void ObjFile::parseRelocations(const section_64 &sec, r.referent = symbols[relInfo.r_symbolnum]; r.addend = totalAddend; } else { - SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1]; + SubsectionMapping &referentSubsecMap = + subsections[relInfo.r_symbolnum - 1]; const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1]; - uint32_t referentOffset; + uint64_t referentOffset; if (relInfo.r_pcrel) { // The implicit addend for pcrel section relocations is the pcrel offset // in terms of the addresses in the input file. Here we adjust it so @@ -328,7 +332,7 @@ void ObjFile::parseRelocations(const section_64 &sec, static macho::Symbol *createDefined(const structs::nlist_64 &sym, StringRef name, InputSection *isec, - uint32_t value) { + uint64_t value) { // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT): // N_EXT: Global symbols // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped @@ -410,11 +414,51 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym, void ObjFile::parseSymbols(ArrayRef nList, const char *strtab, bool subsectionsViaSymbols) { - // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols - // out-of-sequence. - symbols.resize(nList.size()); - std::vector altEntrySymIdxs; + // Precompute the boundaries of symbols within a section. + // If subsectionsViaSymbols is True then the corresponding subsections will be + // created, otherwise these boundaries are used for the calculation of symbols + // sizes only. + + for (const structs::nlist_64 &sym : nList) { + if ((sym.n_type & N_TYPE) == N_SECT && !(sym.n_desc & N_ALT_ENTRY) && + !subsections[sym.n_sect - 1].empty()) { + SubsectionMapping &subsectionMapping = subsections[sym.n_sect - 1]; + subsectionMapping.push_back( + {sym.n_value - sectionHeaders[sym.n_sect - 1].addr, + subsectionMapping.front().isec}); + } + } + for (SubsectionMapping &subsectionMap : subsections) { + if (subsectionMap.empty()) + continue; + llvm::sort(subsectionMap, + [](const SubsectionEntry &lhs, const SubsectionEntry &rhs) { + return lhs.offset < rhs.offset; + }); + subsectionMap.erase( + std::unique(subsectionMap.begin(), subsectionMap.end(), + [](const SubsectionEntry &lhs, const SubsectionEntry &rhs) { + return lhs.offset == rhs.offset; + }), + subsectionMap.end()); + if (!subsectionsViaSymbols) + continue; + for (size_t i = 0; i < subsectionMap.size(); ++i) { + uint32_t offset = subsectionMap[i].offset; + InputSection *&isec = subsectionMap[i].isec; + uint32_t end = i + 1 < subsectionMap.size() ? subsectionMap[i + 1].offset + : isec->data.size(); + isec = make(*isec); + isec->data = isec->data.slice(offset, end - offset); + // TODO: ld64 appears to preserve the original alignment as well as each + // subsection's offset from the last aligned address. We should consider + // emulating that behavior. + isec->align = MinAlign(isec->align, offset); + } + } + + symbols.resize(nList.size()); for (size_t i = 0, n = nList.size(); i < n; ++i) { const structs::nlist_64 &sym = nList[i]; StringRef name = strtab + sym.n_strx; @@ -425,7 +469,7 @@ void ObjFile::parseSymbols(ArrayRef nList, } const section_64 &sec = sectionHeaders[sym.n_sect - 1]; - SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; + SubsectionMapping &subsecMap = subsections[sym.n_sect - 1]; // parseSections() may have chosen not to parse this section. if (subsecMap.empty()) @@ -437,55 +481,18 @@ void ObjFile::parseSymbols(ArrayRef nList, // use the same subsection. Otherwise, we must split the sections along // symbol boundaries. if (!subsectionsViaSymbols) { - symbols[i] = createDefined(sym, name, subsecMap[0], offset); + symbols[i] = createDefined(sym, name, subsecMap.front().isec, offset); continue; } - // nList entries aren't necessarily arranged in address order. Therefore, - // we can't create alt-entry symbols at this point because a later symbol - // may split its section, which may affect which subsection the alt-entry - // symbol is assigned to. So we need to handle them in a second pass below. - if (sym.n_desc & N_ALT_ENTRY) { - altEntrySymIdxs.push_back(i); - continue; - } - - // Find the subsection corresponding to the greatest section offset that is - // <= that of the current symbol. The subsection that we find either needs - // to be used directly or split in two. - uint32_t firstSize = offset; - InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize); - - if (firstSize == 0) { - // Alias of an existing symbol, or the first symbol in the section. These - // are handled by reusing the existing section. - symbols[i] = createDefined(sym, name, firstIsec, 0); - continue; - } - - // We saw a symbol definition at a new offset. Split the section into two - // subsections. The new symbol uses the second subsection. - auto *secondIsec = make(*firstIsec); - secondIsec->data = firstIsec->data.slice(firstSize); - firstIsec->data = firstIsec->data.slice(0, firstSize); - // TODO: ld64 appears to preserve the original alignment as well as each - // subsection's offset from the last aligned address. We should consider - // emulating that behavior. - secondIsec->align = MinAlign(firstIsec->align, offset); - - subsecMap[offset] = secondIsec; - // By construction, the symbol will be at offset zero in the new section. - symbols[i] = createDefined(sym, name, secondIsec, 0); + InputSection *subsec = findContainingSubsection(subsecMap, &offset); + symbols[i] = createDefined(sym, name, subsec, offset); } - for (size_t idx : altEntrySymIdxs) { - const structs::nlist_64 &sym = nList[idx]; - StringRef name = strtab + sym.n_strx; - SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; - uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr; - InputSection *subsec = findContainingSubsection(subsecMap, &off); - symbols[idx] = createDefined(sym, name, subsec, off); - } + if (!subsectionsViaSymbols) + for (SubsectionMapping &subsectionMap : subsections) + if (!subsectionMap.empty()) + subsectionMap = {subsectionMap.front()}; } OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 6d6cabe42cf427..5b301d3fc30b35 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -47,9 +47,13 @@ enum class RefState : uint8_t; extern std::unique_ptr tar; // If .subsections_via_symbols is set, each InputSection will be split along -// symbol boundaries. The keys of a SubsectionMap represent the offsets of -// each subsection from the start of the original pre-split InputSection. -using SubsectionMap = std::map; +// symbol boundaries. The field offset represents the offset of the subsection +// from the start of the original pre-split InputSection. +struct SubsectionEntry { + uint64_t offset; + InputSection *isec; +}; +using SubsectionMapping = std::vector; class InputFile { public: @@ -68,7 +72,7 @@ class InputFile { MemoryBufferRef mb; std::vector symbols; - std::vector subsections; + std::vector subsections; // Provides an easy way to sort InputFiles deterministically. const int id; @@ -105,7 +109,7 @@ class ObjFile : public InputFile { void parseSymbols(ArrayRef nList, const char *strtab, bool subsectionsViaSymbols); Symbol *parseNonSectionSymbol(const structs::nlist_64 &sym, StringRef name); - void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &); + void parseRelocations(const llvm::MachO::section_64 &, SubsectionMapping &); void parseDebugInfo(); }; diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h index f717c462d7229f..d58f9d47986393 100644 --- a/lld/MachO/Relocations.h +++ b/lld/MachO/Relocations.h @@ -56,7 +56,7 @@ struct Reloc { uint8_t length = 0; // The offset from the start of the subsection that this relocation belongs // to. - uint32_t offset = 0; + uint64_t offset = 0; // Adding this offset to the address of the referent symbol or subsection // gives the destination that this relocation refers to. int64_t addend = 0;