From c9ee8b2867fa961932bdddc2425e6ed96564a175 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 15 Sep 2025 12:10:35 -0700 Subject: [PATCH 1/4] [LLD][MachO] Option to emit separate cstring sections --- lld/MachO/Config.h | 1 + lld/MachO/Driver.cpp | 6 +++++- lld/MachO/InputSection.cpp | 9 ++++++--- lld/MachO/MapFile.cpp | 4 +++- lld/MachO/ObjC.cpp | 2 +- lld/MachO/Options.td | 7 +++++++ lld/MachO/SyntheticSections.h | 17 ++++++++++++++++- lld/MachO/Writer.cpp | 7 ++----- lld/test/MachO/cstring.ll | 32 ++++++++++++++++++++++++++++++++ 9 files changed, 73 insertions(+), 12 deletions(-) create mode 100644 lld/test/MachO/cstring.ll diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 19dba790c1c7c..51b1363d87615 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -222,6 +222,7 @@ struct Configuration { bool pgoWarnMismatch; bool warnThinArchiveMissingMembers; bool disableVerify; + bool separateCstringLiteralSections; bool callGraphProfileSort = false; llvm::StringRef printSymbolOrder; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 3db638e1ead96..f54b6bbdc155c 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1520,7 +1520,8 @@ static void foldIdenticalLiterals() { // We always create a cStringSection, regardless of whether dedupLiterals is // true. If it isn't, we simply create a non-deduplicating CStringSection. // Either way, we must unconditionally finalize it here. - in.cStringSection->finalizeContents(); + for (auto &[name, sec] : in.cStringSectionMap) + sec->finalizeContents(); in.objcMethnameSection->finalizeContents(); in.wordLiteralSection->finalizeContents(); } @@ -1981,6 +1982,9 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, OPT_no_warn_thin_archive_missing_members, true); config->generateUuid = !args.hasArg(OPT_no_uuid); config->disableVerify = args.hasArg(OPT_disable_verify); + config->separateCstringLiteralSections = + args.hasFlag(OPT_separate_cstring_literal_sections, + OPT_no_separate_cstring_literal_sections, false); auto IncompatWithCGSort = [&](StringRef firstArgStr) { // Throw an error only if --call-graph-profile-sort is explicitly specified diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 18b3ff961085b..b7718db45aef6 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -68,9 +68,12 @@ void lld::macho::addInputSection(InputSection *inputSection) { in.objcMethnameSection->inputOrder = inputSectionsOrder++; in.objcMethnameSection->addInput(isec); } else { - if (in.cStringSection->inputOrder == UnspecifiedInputOrder) - in.cStringSection->inputOrder = inputSectionsOrder++; - in.cStringSection->addInput(isec); + auto *osec = in.getOrCreateCStringSection( + config->separateCstringLiteralSections ? isec->getName() + : section_names::cString); + if (osec->inputOrder == UnspecifiedInputOrder) + osec->inputOrder = inputSectionsOrder++; + osec->addInput(isec); } } else if (auto *isec = dyn_cast(inputSection)) { if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp index f3e221a700b14..5e88e19697d67 100644 --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -239,7 +239,9 @@ void macho::writeMapFile() { printIsecArrSyms(textOsec->inputs, textOsec->getThunks()); } else if (auto *concatOsec = dyn_cast(osec)) { printIsecArrSyms(concatOsec->inputs); - } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { + } else if (any_of(in.cStringSectionMap, + [&](auto &it) { return osec == it.getValue(); }) || + osec == in.objcMethnameSection) { const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); uint64_t lastAddr = 0; // strings will never start at address 0, so this // is a sentinel value diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index ab7f73c3a1df6..794b92d2c9d40 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -1057,7 +1057,7 @@ Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name, newStringSec->splitIntoPieces(); newStringSec->pieces[0].live = true; newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection; - in.cStringSection->addInput(newStringSec); + in.getOrCreateCStringSection(section_names::cString)->addInput(newStringSec); assert(newStringSec->pieces.size() == 1); Defined *catNameSym = make( diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 8ae50f380741a..4eeb8fbe11121 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -1084,6 +1084,13 @@ def dyld_env : Separate<["-"], "dyld_env">, def ignore_auto_link : Flag<["-"], "ignore_auto_link">, HelpText<"Ignore LC_LINKER_OPTIONs">, Group; +defm separate_cstring_literal_sections + : BB<"separate-cstring-literal-sections", + "Emit all cstring literals into their respective sections defined by " + "their section names.", + "Emit all cstring literals into the __cstring section. As a special " + "case, the __objc_methname section will still be emitted. (default)">, + Group; def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 5796b0790c83a..130b2d73af810 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -843,7 +843,7 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); struct InStruct { const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; - CStringSection *cStringSection = nullptr; + llvm::StringMap cStringSectionMap; DeduplicatedCStringSection *objcMethnameSection = nullptr; WordLiteralSection *wordLiteralSection = nullptr; RebaseSection *rebase = nullptr; @@ -863,6 +863,21 @@ struct InStruct { InitOffsetsSection *initOffsets = nullptr; ObjCMethListSection *objcMethList = nullptr; ChainedFixupsSection *chainedFixups = nullptr; + + CStringSection *getOrCreateCStringSection(StringRef name) { + auto it = cStringSectionMap.find(name); + if (it != cStringSectionMap.end()) + return it->getValue(); + + std::string &nameData = *make(name); + CStringSection *sec; + if (config->dedupStrings) + sec = make(nameData.c_str()); + else + sec = make(nameData.c_str()); + cStringSectionMap[name] = sec; + return sec; + } }; extern InStruct in; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index f288fadc0d14f..59b2264a7f1ab 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1377,11 +1377,8 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); } void macho::createSyntheticSections() { in.header = make(); - if (config->dedupStrings) - in.cStringSection = - make(section_names::cString); - else - in.cStringSection = make(section_names::cString); + // Materialize the cstring section + in.getOrCreateCStringSection(section_names::cString); in.objcMethnameSection = make(section_names::objcMethname); in.wordLiteralSection = make(); diff --git a/lld/test/MachO/cstring.ll b/lld/test/MachO/cstring.ll new file mode 100644 index 0000000000000..4f82736b0a5f0 --- /dev/null +++ b/lld/test/MachO/cstring.ll @@ -0,0 +1,32 @@ +; REQUIRES: aarch64 +; RUN: llvm-as %s -o %t.o + +; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s +; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR +; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR + +; CHECK-DAG: __cstring +; CHECK-DAG: __new_sec +; CHECK-DAG: __objc_classname +; CHECK-DAG: __objc_methname +; CHECK-DAG: __objc_methtype + +; CSTR-DAG: __cstring +; CSTR-DAG: __objc_methname + +target triple = "x86_64-apple-darwin" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" + +@.str = private unnamed_addr constant [10 x i8] c"my string\00", align 1 +@.str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1 +@OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1 +@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1 + +@llvm.compiler.used = appending global [5 x ptr] [ + ptr @.str, + ptr @.str1, + ptr @OBJC_METH_VAR_NAME_, + ptr @OBJC_CLASS_NAME_, + ptr @OBJC_METH_VAR_TYPE_ +] From 20e7d1a09bc6a5e36c1abaf55865b2b1031a14bd Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Thu, 18 Sep 2025 11:44:36 -0700 Subject: [PATCH 2/4] use vector to store cstring sections --- lld/MachO/Driver.cpp | 3 +-- lld/MachO/InputSection.cpp | 19 +++++++------------ lld/MachO/MapFile.cpp | 4 +--- lld/MachO/ObjC.cpp | 2 +- lld/MachO/SyntheticSections.h | 20 +++++++++++++------- lld/MachO/Writer.cpp | 9 +++++---- 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index f54b6bbdc155c..c1a4c0443933a 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1520,9 +1520,8 @@ static void foldIdenticalLiterals() { // We always create a cStringSection, regardless of whether dedupLiterals is // true. If it isn't, we simply create a non-deduplicating CStringSection. // Either way, we must unconditionally finalize it here. - for (auto &[name, sec] : in.cStringSectionMap) + for (auto *sec : in.cStringSections) sec->finalizeContents(); - in.objcMethnameSection->finalizeContents(); in.wordLiteralSection->finalizeContents(); } diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index b7718db45aef6..b173e14cc86a8 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -63,18 +63,13 @@ void lld::macho::addInputSection(InputSection *inputSection) { isec->parent = osec; inputSections.push_back(isec); } else if (auto *isec = dyn_cast(inputSection)) { - if (isec->getName() == section_names::objcMethname) { - if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder) - in.objcMethnameSection->inputOrder = inputSectionsOrder++; - in.objcMethnameSection->addInput(isec); - } else { - auto *osec = in.getOrCreateCStringSection( - config->separateCstringLiteralSections ? isec->getName() - : section_names::cString); - if (osec->inputOrder == UnspecifiedInputOrder) - osec->inputOrder = inputSectionsOrder++; - osec->addInput(isec); - } + bool useSectionName = config->separateCstringLiteralSections || + isec->getName() == section_names::objcMethname; + auto *osec = in.getOrCreateCStringSection( + useSectionName ? isec->getName() : section_names::cString); + if (osec->inputOrder == UnspecifiedInputOrder) + osec->inputOrder = inputSectionsOrder++; + osec->addInput(isec); } else if (auto *isec = dyn_cast(inputSection)) { if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) in.wordLiteralSection->inputOrder = inputSectionsOrder++; diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp index 5e88e19697d67..29ebcdcf9a832 100644 --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -239,9 +239,7 @@ void macho::writeMapFile() { printIsecArrSyms(textOsec->inputs, textOsec->getThunks()); } else if (auto *concatOsec = dyn_cast(osec)) { printIsecArrSyms(concatOsec->inputs); - } else if (any_of(in.cStringSectionMap, - [&](auto &it) { return osec == it.getValue(); }) || - osec == in.objcMethnameSection) { + } else if (is_contained(in.cStringSections, osec)) { const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); uint64_t lastAddr = 0; // strings will never start at address 0, so this // is a sentinel value diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 794b92d2c9d40..ab7f73c3a1df6 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -1057,7 +1057,7 @@ Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name, newStringSec->splitIntoPieces(); newStringSec->pieces[0].live = true; newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection; - in.getOrCreateCStringSection(section_names::cString)->addInput(newStringSec); + in.cStringSection->addInput(newStringSec); assert(newStringSec->pieces.size() == 1); Defined *catNameSym = make( diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 130b2d73af810..ffc6202ae2191 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -843,7 +843,8 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); struct InStruct { const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; - llvm::StringMap cStringSectionMap; + llvm::SmallVector cStringSections; + CStringSection *cStringSection = nullptr; DeduplicatedCStringSection *objcMethnameSection = nullptr; WordLiteralSection *wordLiteralSection = nullptr; RebaseSection *rebase = nullptr; @@ -864,20 +865,25 @@ struct InStruct { ObjCMethListSection *objcMethList = nullptr; ChainedFixupsSection *chainedFixups = nullptr; - CStringSection *getOrCreateCStringSection(StringRef name) { - auto it = cStringSectionMap.find(name); - if (it != cStringSectionMap.end()) - return it->getValue(); + CStringSection *getOrCreateCStringSection(StringRef name, + bool forceDedupStrings = false) { + auto [it, didEmplace] = + cStringSectionMap.try_emplace(name, cStringSections.size()); + if (!didEmplace) + return cStringSections[it->getValue()]; std::string &nameData = *make(name); CStringSection *sec; - if (config->dedupStrings) + if (config->dedupStrings || forceDedupStrings) sec = make(nameData.c_str()); else sec = make(nameData.c_str()); - cStringSectionMap[name] = sec; + cStringSections.push_back(sec); return sec; } + +private: + llvm::StringMap cStringSectionMap; }; extern InStruct in; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 59b2264a7f1ab..995792be41747 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1377,10 +1377,11 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); } void macho::createSyntheticSections() { in.header = make(); - // Materialize the cstring section - in.getOrCreateCStringSection(section_names::cString); - in.objcMethnameSection = - make(section_names::objcMethname); + // Materialize cstring and objcMethname sections + in.cStringSection = in.getOrCreateCStringSection(section_names::cString); + in.objcMethnameSection = cast( + in.getOrCreateCStringSection(section_names::objcMethname, + /*forceDedupStrings=*/true)); in.wordLiteralSection = make(); if (config->emitChainedFixups) { in.chainedFixups = make(); From 35318d44a1cb6cc8ec5741e537f17ca1bc0eec90 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Thu, 18 Sep 2025 11:56:50 -0700 Subject: [PATCH 3/4] clear in struct --- lld/MachO/Driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index c1a4c0443933a..4e24e1b48f13d 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1709,7 +1709,7 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, firstTLVDataSection = nullptr; tar = nullptr; - memset(&in, 0, sizeof(in)); + in = InStruct(); resetLoadedDylibs(); resetOutputSegments(); From a5a9c5115a3497ca50c67d146bd5a3f6d0b7778c Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Thu, 18 Sep 2025 12:00:49 -0700 Subject: [PATCH 4/4] add comment --- lld/MachO/SyntheticSections.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index ffc6202ae2191..1abf3c210a64e 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -843,6 +843,8 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); struct InStruct { const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; + /// The list of cstring sections. Note that this includes \p cStringSection + /// and \p objcMethnameSection already. llvm::SmallVector cStringSections; CStringSection *cStringSection = nullptr; DeduplicatedCStringSection *objcMethnameSection = nullptr;