Skip to content

Commit fc8f54d

Browse files
authored
[LLD][MachO] Option to emit separate cstring sections (#158720)
Add the `--{no-}separate-cstring-literal-sections` option to emit cstring literals into sections defined by their section name. This allows for changes like swiftlang/swift#84300 and swiftlang/swift#84236 to actually have an affect. The default behavior has not changed. The reason this is useful is because strings in different sections might have different access patterns at runtime. By splitting these strings into separate sections, we may reduce the number of page faults during startup. For example, the ObjC runtime accesses all strings in `__objc_classname` before main.
1 parent 8843111 commit fc8f54d

File tree

8 files changed

+82
-20
lines changed

8 files changed

+82
-20
lines changed

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ struct Configuration {
222222
bool pgoWarnMismatch;
223223
bool warnThinArchiveMissingMembers;
224224
bool disableVerify;
225+
bool separateCstringLiteralSections;
225226

226227
bool callGraphProfileSort = false;
227228
llvm::StringRef printSymbolOrder;

lld/MachO/Driver.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,8 +1522,8 @@ static void foldIdenticalLiterals() {
15221522
// We always create a cStringSection, regardless of whether dedupLiterals is
15231523
// true. If it isn't, we simply create a non-deduplicating CStringSection.
15241524
// Either way, we must unconditionally finalize it here.
1525-
in.cStringSection->finalizeContents();
1526-
in.objcMethnameSection->finalizeContents();
1525+
for (auto *sec : in.cStringSections)
1526+
sec->finalizeContents();
15271527
in.wordLiteralSection->finalizeContents();
15281528
}
15291529

@@ -1711,7 +1711,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
17111711

17121712
firstTLVDataSection = nullptr;
17131713
tar = nullptr;
1714-
memset(&in, 0, sizeof(in));
1714+
in = InStruct();
17151715

17161716
resetLoadedDylibs();
17171717
resetOutputSegments();
@@ -1983,6 +1983,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
19831983
OPT_no_warn_thin_archive_missing_members, true);
19841984
config->generateUuid = !args.hasArg(OPT_no_uuid);
19851985
config->disableVerify = args.hasArg(OPT_disable_verify);
1986+
config->separateCstringLiteralSections =
1987+
args.hasFlag(OPT_separate_cstring_literal_sections,
1988+
OPT_no_separate_cstring_literal_sections, false);
19861989

19871990
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
19881991
// Throw an error only if --call-graph-profile-sort is explicitly specified

lld/MachO/InputSection.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,13 @@ void lld::macho::addInputSection(InputSection *inputSection) {
6363
isec->parent = osec;
6464
inputSections.push_back(isec);
6565
} else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
66-
if (isec->getName() == section_names::objcMethname) {
67-
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
68-
in.objcMethnameSection->inputOrder = inputSectionsOrder++;
69-
in.objcMethnameSection->addInput(isec);
70-
} else {
71-
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
72-
in.cStringSection->inputOrder = inputSectionsOrder++;
73-
in.cStringSection->addInput(isec);
74-
}
66+
bool useSectionName = config->separateCstringLiteralSections ||
67+
isec->getName() == section_names::objcMethname;
68+
auto *osec = in.getOrCreateCStringSection(
69+
useSectionName ? isec->getName() : section_names::cString);
70+
if (osec->inputOrder == UnspecifiedInputOrder)
71+
osec->inputOrder = inputSectionsOrder++;
72+
osec->addInput(isec);
7573
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
7674
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
7775
in.wordLiteralSection->inputOrder = inputSectionsOrder++;

lld/MachO/MapFile.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ void macho::writeMapFile() {
239239
printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
240240
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
241241
printIsecArrSyms(concatOsec->inputs);
242-
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
242+
} else if (is_contained(in.cStringSections, osec)) {
243243
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
244244
uint64_t lastAddr = 0; // strings will never start at address 0, so this
245245
// is a sentinel value

lld/MachO/Options.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,13 @@ def dyld_env : Separate<["-"], "dyld_env">,
10841084
def ignore_auto_link : Flag<["-"], "ignore_auto_link">,
10851085
HelpText<"Ignore LC_LINKER_OPTIONs">,
10861086
Group<grp_rare>;
1087+
defm separate_cstring_literal_sections
1088+
: BB<"separate-cstring-literal-sections",
1089+
"Emit all cstring literals into their respective sections defined by "
1090+
"their section names.",
1091+
"Emit all cstring literals into the __cstring section. As a special "
1092+
"case, the __objc_methname section will still be emitted. (default)">,
1093+
Group<grp_rare>;
10871094

10881095
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
10891096

lld/MachO/SyntheticSections.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,9 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
843843
struct InStruct {
844844
const uint8_t *bufferStart = nullptr;
845845
MachHeaderSection *header = nullptr;
846+
/// The list of cstring sections. Note that this includes \p cStringSection
847+
/// and \p objcMethnameSection already.
848+
llvm::SmallVector<CStringSection *> cStringSections;
846849
CStringSection *cStringSection = nullptr;
847850
DeduplicatedCStringSection *objcMethnameSection = nullptr;
848851
WordLiteralSection *wordLiteralSection = nullptr;
@@ -863,6 +866,26 @@ struct InStruct {
863866
InitOffsetsSection *initOffsets = nullptr;
864867
ObjCMethListSection *objcMethList = nullptr;
865868
ChainedFixupsSection *chainedFixups = nullptr;
869+
870+
CStringSection *getOrCreateCStringSection(StringRef name,
871+
bool forceDedupStrings = false) {
872+
auto [it, didEmplace] =
873+
cStringSectionMap.try_emplace(name, cStringSections.size());
874+
if (!didEmplace)
875+
return cStringSections[it->getValue()];
876+
877+
std::string &nameData = *make<std::string>(name);
878+
CStringSection *sec;
879+
if (config->dedupStrings || forceDedupStrings)
880+
sec = make<DeduplicatedCStringSection>(nameData.c_str());
881+
else
882+
sec = make<CStringSection>(nameData.c_str());
883+
cStringSections.push_back(sec);
884+
return sec;
885+
}
886+
887+
private:
888+
llvm::StringMap<unsigned> cStringSectionMap;
866889
};
867890

868891
extern InStruct in;

lld/MachO/Writer.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,13 +1377,11 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); }
13771377

13781378
void macho::createSyntheticSections() {
13791379
in.header = make<MachHeaderSection>();
1380-
if (config->dedupStrings)
1381-
in.cStringSection =
1382-
make<DeduplicatedCStringSection>(section_names::cString);
1383-
else
1384-
in.cStringSection = make<CStringSection>(section_names::cString);
1385-
in.objcMethnameSection =
1386-
make<DeduplicatedCStringSection>(section_names::objcMethname);
1380+
// Materialize cstring and objcMethname sections
1381+
in.cStringSection = in.getOrCreateCStringSection(section_names::cString);
1382+
in.objcMethnameSection = cast<DeduplicatedCStringSection>(
1383+
in.getOrCreateCStringSection(section_names::objcMethname,
1384+
/*forceDedupStrings=*/true));
13871385
in.wordLiteralSection = make<WordLiteralSection>();
13881386
if (config->emitChainedFixups) {
13891387
in.chainedFixups = make<ChainedFixupsSection>();

lld/test/MachO/cstring.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; REQUIRES: aarch64
2+
; RUN: llvm-as %s -o %t.o
3+
4+
; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s
5+
; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
6+
; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
7+
8+
; CHECK-DAG: __cstring
9+
; CHECK-DAG: __new_sec
10+
; CHECK-DAG: __objc_classname
11+
; CHECK-DAG: __objc_methname
12+
; CHECK-DAG: __objc_methtype
13+
14+
; CSTR-DAG: __cstring
15+
; CSTR-DAG: __objc_methname
16+
17+
target triple = "x86_64-apple-darwin"
18+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
19+
20+
@.str = private unnamed_addr constant [10 x i8] c"my string\00", align 1
21+
@.str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1
22+
@OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1
23+
@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
24+
@OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
25+
26+
@llvm.compiler.used = appending global [5 x ptr] [
27+
ptr @.str,
28+
ptr @.str1,
29+
ptr @OBJC_METH_VAR_NAME_,
30+
ptr @OBJC_CLASS_NAME_,
31+
ptr @OBJC_METH_VAR_TYPE_
32+
]

0 commit comments

Comments
 (0)