-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LLD][MachO] Option to emit separate cstring sections #158720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lld-macho @llvm/pr-subscribers-lld Author: Ellis Hoag (ellishg) ChangesAdd the The reason this is useful is because strings in different sections might have different access patterns at runtime. By splitting these strings into separate sections, we may reduce the number of page faults during startup. For example, the ObjC runtime accesses all strings in Full diff: https://github.com/llvm/llvm-project/pull/158720.diff 9 Files Affected:
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 19dba790c1c7c..51b1363d87615 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -222,6 +222,7 @@ struct Configuration {
bool pgoWarnMismatch;
bool warnThinArchiveMissingMembers;
bool disableVerify;
+ bool separateCstringLiteralSections;
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3db638e1ead96..f54b6bbdc155c 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1520,7 +1520,8 @@ static void foldIdenticalLiterals() {
// We always create a cStringSection, regardless of whether dedupLiterals is
// true. If it isn't, we simply create a non-deduplicating CStringSection.
// Either way, we must unconditionally finalize it here.
- in.cStringSection->finalizeContents();
+ for (auto &[name, sec] : in.cStringSectionMap)
+ sec->finalizeContents();
in.objcMethnameSection->finalizeContents();
in.wordLiteralSection->finalizeContents();
}
@@ -1981,6 +1982,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
OPT_no_warn_thin_archive_missing_members, true);
config->generateUuid = !args.hasArg(OPT_no_uuid);
config->disableVerify = args.hasArg(OPT_disable_verify);
+ config->separateCstringLiteralSections =
+ args.hasFlag(OPT_separate_cstring_literal_sections,
+ OPT_no_separate_cstring_literal_sections, false);
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 18b3ff961085b..b7718db45aef6 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -68,9 +68,12 @@ void lld::macho::addInputSection(InputSection *inputSection) {
in.objcMethnameSection->inputOrder = inputSectionsOrder++;
in.objcMethnameSection->addInput(isec);
} else {
- if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
- in.cStringSection->inputOrder = inputSectionsOrder++;
- in.cStringSection->addInput(isec);
+ auto *osec = in.getOrCreateCStringSection(
+ config->separateCstringLiteralSections ? isec->getName()
+ : section_names::cString);
+ if (osec->inputOrder == UnspecifiedInputOrder)
+ osec->inputOrder = inputSectionsOrder++;
+ osec->addInput(isec);
}
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index f3e221a700b14..5e88e19697d67 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -239,7 +239,9 @@ void macho::writeMapFile() {
printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
printIsecArrSyms(concatOsec->inputs);
- } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
+ } else if (any_of(in.cStringSectionMap,
+ [&](auto &it) { return osec == it.getValue(); }) ||
+ osec == in.objcMethnameSection) {
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
uint64_t lastAddr = 0; // strings will never start at address 0, so this
// is a sentinel value
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index ab7f73c3a1df6..794b92d2c9d40 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -1057,7 +1057,7 @@ Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
newStringSec->splitIntoPieces();
newStringSec->pieces[0].live = true;
newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
- in.cStringSection->addInput(newStringSec);
+ in.getOrCreateCStringSection(section_names::cString)->addInput(newStringSec);
assert(newStringSec->pieces.size() == 1);
Defined *catNameSym = make<Defined>(
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 8ae50f380741a..4eeb8fbe11121 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1084,6 +1084,13 @@ def dyld_env : Separate<["-"], "dyld_env">,
def ignore_auto_link : Flag<["-"], "ignore_auto_link">,
HelpText<"Ignore LC_LINKER_OPTIONs">,
Group<grp_rare>;
+defm separate_cstring_literal_sections
+ : BB<"separate-cstring-literal-sections",
+ "Emit all cstring literals into their respective sections defined by "
+ "their section names.",
+ "Emit all cstring literals into the __cstring section. As a special "
+ "case, the __objc_methname section will still be emitted. (default)">,
+ Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 5796b0790c83a..130b2d73af810 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -843,7 +843,7 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
- CStringSection *cStringSection = nullptr;
+ llvm::StringMap<CStringSection *> cStringSectionMap;
DeduplicatedCStringSection *objcMethnameSection = nullptr;
WordLiteralSection *wordLiteralSection = nullptr;
RebaseSection *rebase = nullptr;
@@ -863,6 +863,21 @@ struct InStruct {
InitOffsetsSection *initOffsets = nullptr;
ObjCMethListSection *objcMethList = nullptr;
ChainedFixupsSection *chainedFixups = nullptr;
+
+ CStringSection *getOrCreateCStringSection(StringRef name) {
+ auto it = cStringSectionMap.find(name);
+ if (it != cStringSectionMap.end())
+ return it->getValue();
+
+ std::string &nameData = *make<std::string>(name);
+ CStringSection *sec;
+ if (config->dedupStrings)
+ sec = make<DeduplicatedCStringSection>(nameData.c_str());
+ else
+ sec = make<CStringSection>(nameData.c_str());
+ cStringSectionMap[name] = sec;
+ return sec;
+ }
};
extern InStruct in;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index f288fadc0d14f..59b2264a7f1ab 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1377,11 +1377,8 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); }
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
- if (config->dedupStrings)
- in.cStringSection =
- make<DeduplicatedCStringSection>(section_names::cString);
- else
- in.cStringSection = make<CStringSection>(section_names::cString);
+ // Materialize the cstring section
+ in.getOrCreateCStringSection(section_names::cString);
in.objcMethnameSection =
make<DeduplicatedCStringSection>(section_names::objcMethname);
in.wordLiteralSection = make<WordLiteralSection>();
diff --git a/lld/test/MachO/cstring.ll b/lld/test/MachO/cstring.ll
new file mode 100644
index 0000000000000..4f82736b0a5f0
--- /dev/null
+++ b/lld/test/MachO/cstring.ll
@@ -0,0 +1,32 @@
+; REQUIRES: aarch64
+; RUN: llvm-as %s -o %t.o
+
+; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s
+; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+
+; CHECK-DAG: __cstring
+; CHECK-DAG: __new_sec
+; CHECK-DAG: __objc_classname
+; CHECK-DAG: __objc_methname
+; CHECK-DAG: __objc_methtype
+
+; CSTR-DAG: __cstring
+; CSTR-DAG: __objc_methname
+
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+
+@.str = private unnamed_addr constant [10 x i8] c"my string\00", align 1
+@.str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1
+@OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+
+@llvm.compiler.used = appending global [5 x ptr] [
+ ptr @.str,
+ ptr @.str1,
+ ptr @OBJC_METH_VAR_NAME_,
+ ptr @OBJC_CLASS_NAME_,
+ ptr @OBJC_METH_VAR_TYPE_
+]
|
For reference, code emitting the |
lld/MachO/MapFile.cpp
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think performance is a concern here, since the number of entries in this map is not large.
Although I don't believe using this map impacts the determinism of the output, could you double-check that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pointing out the iteration order. Instead I decided to store cstring sections in a map whose order is deterministic and use a map to point to the indices in the vector.
947444d
to
20e7d1a
Compare
firstTLVDataSection = nullptr; | ||
tar = nullptr; | ||
memset(&in, 0, sizeof(in)); | ||
in = InStruct(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think adding a vector and map to this struct made it no longer trivially copyable. This seems like a better way to clear it anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Maybe a little bit off-topic, but could we consider installing string tail merging for |
I wasn't aware of string tail merging. Thanks for sharing! I think it's definitely worth pursuing if it gives a size win. Do ObjC methods tend to have shares suffixes? I would assume they might share prefixes if two methods are the same but with different number of arguments. |
I just check a large binary and it seems |
I realized this broke the builds. I'll try to land a fix shortly. https://lab.llvm.org/buildbot/#/builders/190/builds/27994
|
Fix a test added in #158720. I had accidentally required arm64 when the test was using x86_64.
Fix a test added in llvm/llvm-project#158720. I had accidentally required arm64 when the test was using x86_64.
@nocchijiang I just published #161262 to tail merge cstring sections and it gives a nice win! Thanks again for pointing this out! |
I forgot to add a release note for llvm/llvm-project#158720 so I'll add it here.
I forgot to add a release note for llvm#158720 so I'll add it here.
Add the flag `--tail-merge-strings` to enable tail merging of cstrings. For example, if we have strings `mystring\0` and `ring\0`, we could place `mystring\0` at address `0x1000` and `ring\0` at address `0x1004` and have them share the same underlying data. It turns out that many ObjC method names can be tail merged. For example, `error:` and `doFoo:error:`. On a large iOS binary, we saw nearly a 15% size improvement in the `__TEXT__objc_methname` section and negligible impact on link time. ``` $ bloaty --domain=vm merged.o.stripped -- base.o.stripped VM SIZE -------------- +95% +5.85Ki [__TEXT] -2.4% -239Ki __TEXT,__cstring -14.5% -710Ki __TEXT,__objc_methname -1.0% -944Ki TOTAL ``` Tail merging for MachO was originally removed in 7c269db. The previous implementation used `StringTableBuilder`, but that was removed in 4308f03 to ensure deduplicated strings are aligned correctly. This implementation ensures that tail merged strings are also aligned correctly. Special thanks to nocchijiang for pointing this out in #158720 (comment). Depends on #161253.
Add the flag `--tail-merge-strings` to enable tail merging of cstrings. For example, if we have strings `mystring\0` and `ring\0`, we could place `mystring\0` at address `0x1000` and `ring\0` at address `0x1004` and have them share the same underlying data. It turns out that many ObjC method names can be tail merged. For example, `error:` and `doFoo:error:`. On a large iOS binary, we saw nearly a 15% size improvement in the `__TEXT__objc_methname` section and negligible impact on link time. ``` $ bloaty --domain=vm merged.o.stripped -- base.o.stripped VM SIZE -------------- +95% +5.85Ki [__TEXT] -2.4% -239Ki __TEXT,__cstring -14.5% -710Ki __TEXT,__objc_methname -1.0% -944Ki TOTAL ``` Tail merging for MachO was originally removed in llvm/llvm-project@7c269db. The previous implementation used `StringTableBuilder`, but that was removed in llvm/llvm-project@4308f03 to ensure deduplicated strings are aligned correctly. This implementation ensures that tail merged strings are also aligned correctly. Special thanks to nocchijiang for pointing this out in llvm/llvm-project#158720 (comment). Depends on llvm/llvm-project#161253.
Add the
--{no-}separate-cstring-literal-sections
option to emit cstring literals into sections defined by their section name. This allows for changes like swiftlang/swift#84300 and swiftlang/swift#84236 to actually have an affect. The default behavior has not changed.The reason this is useful is because strings in different sections might have different access patterns at runtime. By splitting these strings into separate sections, we may reduce the number of page faults during startup. For example, the ObjC runtime accesses all strings in
__objc_classname
before main.