From 2e997dba5502eec845f0371e0aef083281f77fc4 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 29 Sep 2025 10:20:28 -0700 Subject: [PATCH 01/10] [lld][macho][NFC] Factor count zeros into helper function --- lld/MachO/SyntheticSections.cpp | 57 +++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 228b84db21c2a..d38b6c9e00157 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1685,31 +1685,7 @@ void CStringSection::writeTo(uint8_t *buf) const { } } -void CStringSection::finalizeContents() { - uint64_t offset = 0; - // TODO: Call buildCStringPriorities() to support cstring ordering when - // deduplication is off, although this may negatively impact build - // performance. - for (CStringInputSection *isec : inputs) { - for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { - if (!piece.live) - continue; - // See comment above DeduplicatedCStringSection for how alignment is - // handled. - uint32_t pieceAlign = 1 - << llvm::countr_zero(isec->align | piece.inSecOff); - offset = alignToPowerOf2(offset, pieceAlign); - piece.outSecOff = offset; - isec->isFinal = true; - StringRef string = isec->getStringRef(i); - offset += string.size() + 1; // account for null terminator - } - } - size = offset; -} - -// Mergeable cstring literals are found under the __TEXT,__cstring section. In -// contrast to ELF, which puts strings that need different alignments into +// In contrast to ELF, which puts strings that need different alignments into // different sections, clang's Mach-O backend puts them all in one section. // Strings that need to be aligned have the .p2align directive emitted before // them, which simply translates into zero padding in the object file. In other @@ -1744,8 +1720,33 @@ void CStringSection::finalizeContents() { // requires its operand addresses to be 16-byte aligned). However, there will // typically also be other cstrings in the same file that aren't used via SIMD // and don't need this alignment. They will be emitted at some arbitrary address -// `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16 -// % A`. +// `A`, but ld64 will treat them as being 16-byte aligned with an offset of +// `16 % A`. +static uint8_t getStringPieceAlignment(const CStringInputSection *isec, + const StringPiece &piece) { + return llvm::countr_zero(isec->align | piece.inSecOff); +} + +void CStringSection::finalizeContents() { + uint64_t offset = 0; + // TODO: Call buildCStringPriorities() to support cstring ordering when + // deduplication is off, although this may negatively impact build + // performance. + for (CStringInputSection *isec : inputs) { + for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { + if (!piece.live) + continue; + uint32_t pieceAlign = 1 << getStringPieceAlignment(isec, piece); + offset = alignToPowerOf2(offset, pieceAlign); + piece.outSecOff = offset; + isec->isFinal = true; + StringRef string = isec->getStringRef(i); + offset += string.size() + 1; // account for null terminator + } + } + size = offset; +} + void DeduplicatedCStringSection::finalizeContents() { // Find the largest alignment required for each string. for (const CStringInputSection *isec : inputs) { @@ -1754,7 +1755,7 @@ void DeduplicatedCStringSection::finalizeContents() { continue; auto s = isec->getCachedHashStringRef(i); assert(isec->align != 0); - uint8_t trailingZeros = llvm::countr_zero(isec->align | piece.inSecOff); + uint8_t trailingZeros = getStringPieceAlignment(isec, piece); auto it = stringOffsetMap.insert( std::make_pair(s, StringOffset(trailingZeros))); if (!it.second && it.first->second.trailingZeros < trailingZeros) From ee87e104056cc89a9d5e151edefca335aa823909 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 29 Sep 2025 10:47:52 -0700 Subject: [PATCH 02/10] fix format --- lld/MachO/SyntheticSections.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index d38b6c9e00157..5645d8a05a28f 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1723,7 +1723,7 @@ void CStringSection::writeTo(uint8_t *buf) const { // `A`, but ld64 will treat them as being 16-byte aligned with an offset of // `16 % A`. static uint8_t getStringPieceAlignment(const CStringInputSection *isec, - const StringPiece &piece) { + const StringPiece &piece) { return llvm::countr_zero(isec->align | piece.inSecOff); } From d47f1b9a1075e9588e0bc54afb2da33838ba71b1 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 29 Sep 2025 11:03:21 -0700 Subject: [PATCH 03/10] [lld][MachO] Use llvm::Align and remove StringOffset type --- lld/MachO/SyntheticSections.cpp | 64 ++++++++++++++------------------- lld/MachO/SyntheticSections.h | 12 ++----- 2 files changed, 28 insertions(+), 48 deletions(-) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 5645d8a05a28f..38386c107fea0 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -848,8 +848,7 @@ void ObjCSelRefsHelper::initialize() { void ObjCSelRefsHelper::cleanup() { methnameToSelref.clear(); } ConcatInputSection *ObjCSelRefsHelper::makeSelRef(StringRef methname) { - auto methnameOffset = - in.objcMethnameSection->getStringOffset(methname).outSecOff; + auto methnameOffset = in.objcMethnameSection->getStringOffset(methname); size_t wordSize = target->wordSize; uint8_t *selrefData = bAlloc().Allocate(wordSize); @@ -1722,13 +1721,12 @@ void CStringSection::writeTo(uint8_t *buf) const { // and don't need this alignment. They will be emitted at some arbitrary address // `A`, but ld64 will treat them as being 16-byte aligned with an offset of // `16 % A`. -static uint8_t getStringPieceAlignment(const CStringInputSection *isec, - const StringPiece &piece) { - return llvm::countr_zero(isec->align | piece.inSecOff); +static Align getStringPieceAlignment(const CStringInputSection *isec, + const StringPiece &piece) { + return llvm::Align(1ULL << llvm::countr_zero(isec->align | piece.inSecOff)); } void CStringSection::finalizeContents() { - uint64_t offset = 0; // TODO: Call buildCStringPriorities() to support cstring ordering when // deduplication is off, although this may negatively impact build // performance. @@ -1736,30 +1734,27 @@ void CStringSection::finalizeContents() { for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { if (!piece.live) continue; - uint32_t pieceAlign = 1 << getStringPieceAlignment(isec, piece); - offset = alignToPowerOf2(offset, pieceAlign); - piece.outSecOff = offset; - isec->isFinal = true; + piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece)); StringRef string = isec->getStringRef(i); - offset += string.size() + 1; // account for null terminator + size = piece.outSecOff + string.size() + 1; // account for null terminator } + isec->isFinal = true; } - size = offset; } void DeduplicatedCStringSection::finalizeContents() { // Find the largest alignment required for each string. + DenseMap strToAlignment; for (const CStringInputSection *isec : inputs) { for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { if (!piece.live) continue; auto s = isec->getCachedHashStringRef(i); assert(isec->align != 0); - uint8_t trailingZeros = getStringPieceAlignment(isec, piece); - auto it = stringOffsetMap.insert( - std::make_pair(s, StringOffset(trailingZeros))); - if (!it.second && it.first->second.trailingZeros < trailingZeros) - it.first->second.trailingZeros = trailingZeros; + auto align = getStringPieceAlignment(isec, piece); + auto [it, wasInserted] = strToAlignment.try_emplace(s, align); + if (!wasInserted && it->second < align) + it->second = align; } } @@ -1769,38 +1764,31 @@ void DeduplicatedCStringSection::finalizeContents() { for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) { auto &piece = isec->pieces[i]; auto s = isec->getCachedHashStringRef(i); - auto it = stringOffsetMap.find(s); - assert(it != stringOffsetMap.end()); - lld::macho::DeduplicatedCStringSection::StringOffset &offsetInfo = - it->second; - if (offsetInfo.outSecOff == UINT64_MAX) { - offsetInfo.outSecOff = - alignToPowerOf2(size, 1ULL << offsetInfo.trailingZeros); - size = offsetInfo.outSecOff + s.size() + 1; // account for null terminator + auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0); + if (wasInserted) { + // Avoid computing the offset until we are sure we will need to + uint64_t offset = alignTo(size, strToAlignment.at(s)); + it->second = offset; + size = offset + s.size() + 1; // account for null terminator } - piece.outSecOff = offsetInfo.outSecOff; + // If the string was already in stringOffsetMap, it is a duplicate and we + // only need to assign the offset. + piece.outSecOff = it->second; } for (CStringInputSection *isec : inputs) isec->isFinal = true; } void DeduplicatedCStringSection::writeTo(uint8_t *buf) const { - for (const auto &p : stringOffsetMap) { - StringRef data = p.first.val(); - uint64_t off = p.second.outSecOff; - if (!data.empty()) - memcpy(buf + off, data.data(), data.size()); - } + for (const auto &[s, outSecOff] : stringOffsetMap) + if (s.size()) + memcpy(buf + outSecOff, s.data(), s.size()); } -DeduplicatedCStringSection::StringOffset -DeduplicatedCStringSection::getStringOffset(StringRef str) const { +uint64_t DeduplicatedCStringSection::getStringOffset(StringRef str) const { // StringPiece uses 31 bits to store the hashes, so we replicate that uint32_t hash = xxh3_64bits(str) & 0x7fffffff; - auto offset = stringOffsetMap.find(CachedHashStringRef(str, hash)); - assert(offset != stringOffsetMap.end() && - "Looked-up strings should always exist in section"); - return offset->second; + return stringOffsetMap.at(CachedHashStringRef(str, hash)); } // This section is actually emitted as __TEXT,__const by ld64, but clang may diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 1abf3c210a64e..a37dd66107ee7 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -571,18 +571,10 @@ class DeduplicatedCStringSection final : public CStringSection { uint64_t getSize() const override { return size; } void finalizeContents() override; void writeTo(uint8_t *buf) const override; - - struct StringOffset { - uint8_t trailingZeros; - uint64_t outSecOff = UINT64_MAX; - - explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {} - }; - - StringOffset getStringOffset(StringRef str) const; + uint64_t getStringOffset(StringRef str) const; private: - llvm::DenseMap stringOffsetMap; + llvm::DenseMap stringOffsetMap; size_t size = 0; }; From 4e0bde29f824a1dc2cfce4a64ad052783e80db11 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 29 Sep 2025 12:01:51 -0700 Subject: [PATCH 04/10] init size to zero --- lld/MachO/SyntheticSections.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 38386c107fea0..903ba78a27c75 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1727,6 +1727,7 @@ static Align getStringPieceAlignment(const CStringInputSection *isec, } void CStringSection::finalizeContents() { + size = 0; // TODO: Call buildCStringPriorities() to support cstring ordering when // deduplication is off, although this may negatively impact build // performance. From 4742cca2ec71b6651f76c12b9d4aea2706cec02c Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 29 Sep 2025 12:21:49 -0700 Subject: [PATCH 05/10] [lld][MachO] Tail merge strings --- lld/MachO/SyntheticSections.cpp | 58 +++++++++++++++++++- lld/test/MachO/cstring-dedup.s | 3 +- lld/test/MachO/cstring-tailmerge.s | 85 ++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 lld/test/MachO/cstring-tailmerge.s diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 903ba78a27c75..460a0b5a16ab0 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1746,6 +1746,7 @@ void CStringSection::finalizeContents() { void DeduplicatedCStringSection::finalizeContents() { // Find the largest alignment required for each string. DenseMap strToAlignment; + std::vector deduplicatedStrs; for (const CStringInputSection *isec : inputs) { for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { if (!piece.live) @@ -1754,17 +1755,57 @@ void DeduplicatedCStringSection::finalizeContents() { assert(isec->align != 0); auto align = getStringPieceAlignment(isec, piece); auto [it, wasInserted] = strToAlignment.try_emplace(s, align); + if (wasInserted) + deduplicatedStrs.push_back(s); if (!wasInserted && it->second < align) it->second = align; } } + // Like lexigraphical sort, except we read strings in reverse and take the + // longest string first + // TODO: We could improve performance by implementing our own sort that avoids + // comparing characters we know to be the same. See + // StringTableBuilder::multikeySort() for details + llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) { + for (const auto &[leftChar, rightChar] : + llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) { + if (leftChar == rightChar) + continue; + return leftChar < rightChar; + } + return left.size() > right.size(); + }); + std::optional mergeCandidate; + DenseMap> + tailMergeMap; + for (auto &s : deduplicatedStrs) { + if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) { + mergeCandidate = s; + continue; + } + uint64_t tailOffset = mergeCandidate->size() - s.size(); + // TODO: If the tail offset is incompatible with this string's alignment, we + // might be able to find another superstring with a compatible tail offset. + // The difficulty is how to do this efficiently + const auto &align = strToAlignment.at(s); + if (!isAligned(align, tailOffset)) + continue; + auto &mergeCandidateAlign = strToAlignment[*mergeCandidate]; + if (align > mergeCandidateAlign) + mergeCandidateAlign = align; + tailMergeMap.try_emplace(s, *mergeCandidate, tailOffset); + } + // Sort the strings for performance and compression size win, and then // assign an offset for each string and save it to the corresponding // StringPieces for easy access. for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) { auto &piece = isec->pieces[i]; auto s = isec->getCachedHashStringRef(i); + // Skip tail merged strings until their superstring offsets are resolved + if (tailMergeMap.count(s)) + continue; auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0); if (wasInserted) { // Avoid computing the offset until we are sure we will need to @@ -1776,8 +1817,23 @@ void DeduplicatedCStringSection::finalizeContents() { // only need to assign the offset. piece.outSecOff = it->second; } - for (CStringInputSection *isec : inputs) + for (CStringInputSection *isec : inputs) { + for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { + if (!piece.live) + continue; + auto s = isec->getCachedHashStringRef(i); + auto it = tailMergeMap.find(s); + if (it == tailMergeMap.end()) + continue; + const auto &[superString, tailOffset] = it->second; + assert(!tailMergeMap.count(superString)); + auto &outSecOff = stringOffsetMap[s]; + outSecOff = stringOffsetMap.at(superString) + tailOffset; + piece.outSecOff = outSecOff; + assert(isAligned(strToAlignment.at(s), piece.outSecOff)); + } isec->isFinal = true; + } } void DeduplicatedCStringSection::writeTo(uint8_t *buf) const { diff --git a/lld/test/MachO/cstring-dedup.s b/lld/test/MachO/cstring-dedup.s index a4b15f26afff0..0a42b3d6fcff3 100644 --- a/lld/test/MachO/cstring-dedup.s +++ b/lld/test/MachO/cstring-dedup.s @@ -8,11 +8,10 @@ # RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s # RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER -## Make sure we only have 3 deduplicated strings in __cstring. +## Make sure we only have 2 deduplicated strings in __cstring. # STR: Contents of (__TEXT,__cstring) section # STR: {{[[:xdigit:]]+}} foo # STR: {{[[:xdigit:]]+}} barbaz -# STR: {{[[:xdigit:]]+}} {{$}} ## Make sure both symbol and section relocations point to the right thing. # CHECK: Contents of (__DATA,ptrs) section diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s new file mode 100644 index 0000000000000..83d2810a78139 --- /dev/null +++ b/lld/test/MachO/cstring-tailmerge.s @@ -0,0 +1,85 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t + +# RUN: sed "s//0/g" %t/align.s.template > %t/align-1.s +# RUN: sed "s//1/g" %t/align.s.template > %t/align-2.s +# RUN: sed "s//2/g" %t/align.s.template > %t/align-4.s + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-1.s -o %t/align-1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-2.s -o %t/align-2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4.s -o %t/align-4.o + +# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-1.o -o %t/align-1 +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1 + +# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-2.o -o %t/align-2 +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2 + +# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-4.o -o %t/align-4 +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4 + +# CHECK: Contents of (__TEXT,__cstring) section +# CHECK: [[#%.16x,START:]] get awkward offset{{$}} + +# ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}} +# ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}} + +# ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}} +# ALIGN2: [[#%.16x,START+20+16]] longstr{{$}} +# ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}} +# ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}} + +# ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}} +# ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}} +# ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}} +# ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}} +# ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}} + +# CHECK: SYMBOL TABLE: + +# ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr +# ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr +# ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr +# ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr +# ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str + +# ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +# ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr +# ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr +# ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr +# ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str + +# ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +# ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr +# ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr +# ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr +# ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str + +#--- first.s +.cstring +.p2align 2 +.asciz "get awkward offset" # length = 19 + +#--- align.s.template +.cstring + +.p2align + _myotherlongstr: +.asciz "myotherlongstr" # length = 15 + +.p2align + _otherlongstr: +.asciz "otherlongstr" # length = 13, tail offset = 2 + +.p2align + _longstr: +.asciz "longstr" # length = 8, tail offset = 7 + +.p2align + _otherstr: +.asciz "otherstr" # length = 9 + +.p2align + _str: +.asciz "str" # length = 4, tail offset = 5 From 73693d673d5b4b2ce4588248b4acaf870ffb6c01 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 29 Sep 2025 16:04:17 -0700 Subject: [PATCH 06/10] add to release notes --- lld/docs/ReleaseNotes.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 6ea1ea0fd6c2f..fa2247d64b690 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -44,6 +44,9 @@ MinGW Improvements MachO Improvements ------------------ +* cstrings sections are now tail merged + (`#161262 `_) + WebAssembly Improvements ------------------------ From 6a1da5f20f915b07275ed14f8259aa5e144f72e0 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 29 Sep 2025 16:10:00 -0700 Subject: [PATCH 07/10] add assert --- lld/MachO/SyntheticSections.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 460a0b5a16ab0..edb90fe7fcbcc 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1826,6 +1826,7 @@ void DeduplicatedCStringSection::finalizeContents() { if (it == tailMergeMap.end()) continue; const auto &[superString, tailOffset] = it->second; + assert(superString.val().ends_with(s.val())); assert(!tailMergeMap.count(superString)); auto &outSecOff = stringOffsetMap[s]; outSecOff = stringOffsetMap.at(superString) + tailOffset; From fdaca77c076b135333b75b75c767313eaded1f10 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Wed, 1 Oct 2025 14:15:17 -0700 Subject: [PATCH 08/10] order correctly and add more tests --- lld/MachO/Config.h | 1 + lld/MachO/Driver.cpp | 2 + lld/MachO/Options.td | 4 + lld/MachO/SyntheticSections.cpp | 51 +++---- lld/test/MachO/cstring-dedup.s | 3 +- lld/test/MachO/cstring-tailmerge-objc.s | 144 ++++++++++++++++++ lld/test/MachO/cstring-tailmerge.s | 104 ++++++------- lld/test/MachO/order-file-cstring-tailmerge.s | 56 +++++++ 8 files changed, 285 insertions(+), 80 deletions(-) create mode 100644 lld/test/MachO/cstring-tailmerge-objc.s create mode 100644 lld/test/MachO/order-file-cstring-tailmerge.s diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 51b1363d87615..a2ca5770bf952 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -223,6 +223,7 @@ struct Configuration { bool warnThinArchiveMissingMembers; bool disableVerify; bool separateCstringLiteralSections; + bool tailMergeStrings; bool callGraphProfileSort = false; llvm::StringRef printSymbolOrder; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 7ce987e400a24..94f441b7643a7 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1986,6 +1986,8 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, config->separateCstringLiteralSections = args.hasFlag(OPT_separate_cstring_literal_sections, OPT_no_separate_cstring_literal_sections, false); + config->tailMergeStrings = + args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false); auto IncompatWithCGSort = [&](StringRef firstArgStr) { // Throw an error only if --call-graph-profile-sort is explicitly specified diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 4eeb8fbe11121..be1a1cc2963d9 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -1091,6 +1091,10 @@ defm separate_cstring_literal_sections "Emit all cstring literals into the __cstring section. As a special " "case, the __objc_methname section will still be emitted. (default)">, Group; +defm tail_merge_strings + : BB<"tail-merge-strings", "Enable string tail merging", + "Disable string tail merging to improve link-time performance">, + Group; def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index edb90fe7fcbcc..187cccbe90dbc 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1746,6 +1746,7 @@ void CStringSection::finalizeContents() { void DeduplicatedCStringSection::finalizeContents() { // Find the largest alignment required for each string. DenseMap strToAlignment; + // Used for tail merging only std::vector deduplicatedStrs; for (const CStringInputSection *isec : inputs) { for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { @@ -1755,7 +1756,7 @@ void DeduplicatedCStringSection::finalizeContents() { assert(isec->align != 0); auto align = getStringPieceAlignment(isec, piece); auto [it, wasInserted] = strToAlignment.try_emplace(s, align); - if (wasInserted) + if (config->tailMergeStrings && wasInserted) deduplicatedStrs.push_back(s); if (!wasInserted && it->second < align) it->second = align; @@ -1784,17 +1785,17 @@ void DeduplicatedCStringSection::finalizeContents() { mergeCandidate = s; continue; } - uint64_t tailOffset = mergeCandidate->size() - s.size(); + uint64_t tailMergeOffset = mergeCandidate->size() - s.size(); // TODO: If the tail offset is incompatible with this string's alignment, we // might be able to find another superstring with a compatible tail offset. // The difficulty is how to do this efficiently const auto &align = strToAlignment.at(s); - if (!isAligned(align, tailOffset)) + if (!isAligned(align, tailMergeOffset)) continue; auto &mergeCandidateAlign = strToAlignment[*mergeCandidate]; if (align > mergeCandidateAlign) mergeCandidateAlign = align; - tailMergeMap.try_emplace(s, *mergeCandidate, tailOffset); + tailMergeMap.try_emplace(s, *mergeCandidate, tailMergeOffset); } // Sort the strings for performance and compression size win, and then @@ -1803,9 +1804,18 @@ void DeduplicatedCStringSection::finalizeContents() { for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) { auto &piece = isec->pieces[i]; auto s = isec->getCachedHashStringRef(i); - // Skip tail merged strings until their superstring offsets are resolved - if (tailMergeMap.count(s)) - continue; + // Any string can be tail merged with itself with an offset of zero + uint64_t tailMergeOffset = 0; + auto mergeIt = + config->tailMergeStrings ? tailMergeMap.find(s) : tailMergeMap.end(); + if (mergeIt != tailMergeMap.end()) { + auto &[superString, offset] = mergeIt->second; + // s can be tail merged with superString. Do not layout s. Instead layout + // superString if we haven't already + assert(superString.val().ends_with(s.val())); + s = superString; + tailMergeOffset = offset; + } auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0); if (wasInserted) { // Avoid computing the offset until we are sure we will need to @@ -1813,28 +1823,15 @@ void DeduplicatedCStringSection::finalizeContents() { it->second = offset; size = offset + s.size() + 1; // account for null terminator } - // If the string was already in stringOffsetMap, it is a duplicate and we - // only need to assign the offset. - piece.outSecOff = it->second; - } - for (CStringInputSection *isec : inputs) { - for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { - if (!piece.live) - continue; - auto s = isec->getCachedHashStringRef(i); - auto it = tailMergeMap.find(s); - if (it == tailMergeMap.end()) - continue; - const auto &[superString, tailOffset] = it->second; - assert(superString.val().ends_with(s.val())); - assert(!tailMergeMap.count(superString)); - auto &outSecOff = stringOffsetMap[s]; - outSecOff = stringOffsetMap.at(superString) + tailOffset; - piece.outSecOff = outSecOff; - assert(isAligned(strToAlignment.at(s), piece.outSecOff)); + piece.outSecOff = it->second + tailMergeOffset; + if (mergeIt != tailMergeMap.end()) { + auto &tailMergedString = mergeIt->first; + stringOffsetMap[tailMergedString] = piece.outSecOff; + assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff)); } - isec->isFinal = true; } + for (CStringInputSection *isec : inputs) + isec->isFinal = true; } void DeduplicatedCStringSection::writeTo(uint8_t *buf) const { diff --git a/lld/test/MachO/cstring-dedup.s b/lld/test/MachO/cstring-dedup.s index 0a42b3d6fcff3..a4b15f26afff0 100644 --- a/lld/test/MachO/cstring-dedup.s +++ b/lld/test/MachO/cstring-dedup.s @@ -8,10 +8,11 @@ # RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s # RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER -## Make sure we only have 2 deduplicated strings in __cstring. +## Make sure we only have 3 deduplicated strings in __cstring. # STR: Contents of (__TEXT,__cstring) section # STR: {{[[:xdigit:]]+}} foo # STR: {{[[:xdigit:]]+}} barbaz +# STR: {{[[:xdigit:]]+}} {{$}} ## Make sure both symbol and section relocations point to the right thing. # CHECK: Contents of (__DATA,ptrs) section diff --git a/lld/test/MachO/cstring-tailmerge-objc.s b/lld/test/MachO/cstring-tailmerge-objc.s new file mode 100644 index 0000000000000..270dcc7562613 --- /dev/null +++ b/lld/test/MachO/cstring-tailmerge-objc.s @@ -0,0 +1,144 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; Test that ObjC method names are tail merged and +; ObjCSelRefsHelper::makeSelRef() still works correctly + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/a.o -o %t/a +; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error + +; RUN: %lld -dylib -arch arm64 --deduplicate-strings --no-tail-merge-strings %t/a.o -o %t/nomerge +; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error + +; CHECK: withBar:error: +; NOMERGE: error: + +;--- a.mm +__attribute__((objc_root_class)) +@interface Foo +- (void)withBar:(int)bar error:(int)error; +- (void)error:(int)error; +@end + +@implementation Foo +- (void)withBar:(int)bar error:(int)error {} +- (void)error:(int)error {} +@end + +void *_objc_empty_cache; +void *_objc_empty_vtable; +;--- gen +clang -Oz -target arm64-apple-darwin a.mm -S -o - +;--- a.s + .build_version macos, 11, 0 + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[Foo withBar:error:] +"-[Foo withBar:error:]": ; @"\01-[Foo withBar:error:]" + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[Foo error:] +"-[Foo error:]": ; @"\01-[Foo error:]" + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + ; -- End function + .globl __objc_empty_vtable ; @_objc_empty_vtable +.zerofill __DATA,__common,__objc_empty_vtable,8,3 + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_Foo ; @"OBJC_CLASS_$_Foo" + .p2align 3, 0x0 +_OBJC_CLASS_$_Foo: + .quad _OBJC_METACLASS_$_Foo + .quad 0 + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_CLASS_RO_$_Foo + + .globl _OBJC_METACLASS_$_Foo ; @"OBJC_METACLASS_$_Foo" + .p2align 3, 0x0 +_OBJC_METACLASS_$_Foo: + .quad _OBJC_METACLASS_$_Foo + .quad _OBJC_CLASS_$_Foo + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_METACLASS_RO_$_Foo + + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "Foo" + + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_Foo" +__OBJC_METACLASS_RO_$_Foo: + .long 3 ; 0x3 + .long 40 ; 0x28 + .long 40 ; 0x28 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "withBar:error:" + + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v24@0:8i16i20" + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1 + .asciz "error:" + + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.2: ; @OBJC_METH_VAR_TYPE_.2 + .asciz "v20@0:8i16" + + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_Foo" +__OBJC_$_INSTANCE_METHODS_Foo: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[Foo withBar:error:]" + .quad l_OBJC_METH_VAR_NAME_.1 + .quad l_OBJC_METH_VAR_TYPE_.2 + .quad "-[Foo error:]" + + .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_Foo" +__OBJC_CLASS_RO_$_Foo: + .long 2 ; 0x2 + .long 0 ; 0x0 + .long 0 ; 0x0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_Foo + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .globl __objc_empty_cache ; @_objc_empty_cache +.zerofill __DATA,__common,__objc_empty_cache,8,3 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$" +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_Foo + + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 + +.subsections_via_symbols diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s index 83d2810a78139..cf780b0fc6b90 100644 --- a/lld/test/MachO/cstring-tailmerge.s +++ b/lld/test/MachO/cstring-tailmerge.s @@ -1,85 +1,85 @@ -# REQUIRES: x86 -# RUN: rm -rf %t; split-file %s %t +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t -# RUN: sed "s//0/g" %t/align.s.template > %t/align-1.s -# RUN: sed "s//1/g" %t/align.s.template > %t/align-2.s -# RUN: sed "s//2/g" %t/align.s.template > %t/align-4.s +; RUN: sed "s//0/g" %t/align.s.template > %t/align-1.s +; RUN: sed "s//1/g" %t/align.s.template > %t/align-2.s +; RUN: sed "s//2/g" %t/align.s.template > %t/align-4.s -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-1.s -o %t/align-1.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-2.s -o %t/align-2.o -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4.s -o %t/align-4.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o -# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-1.o -o %t/align-1 -# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1 +; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1 -# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-2.o -o %t/align-2 -# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2 +; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2 -# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-4.o -o %t/align-4 -# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4 +; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4 -# CHECK: Contents of (__TEXT,__cstring) section -# CHECK: [[#%.16x,START:]] get awkward offset{{$}} +; CHECK: Contents of (__TEXT,__cstring) section +; CHECK: [[#%.16x,START:]] get awkward offset{{$}} -# ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}} -# ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}} +; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}} +; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}} -# ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}} -# ALIGN2: [[#%.16x,START+20+16]] longstr{{$}} -# ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}} -# ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}} +; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}} +; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}} +; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}} +; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}} -# ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}} -# ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}} -# ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}} -# ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}} -# ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}} +; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}} +; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}} -# CHECK: SYMBOL TABLE: +; CHECK: SYMBOL TABLE: -# ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr -# ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr -# ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr -# ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr -# ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str +; ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr +; ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr +; ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr +; ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr +; ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str -# ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr -# ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr -# ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr -# ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr -# ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str +; ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +; ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr +; ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr +; ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr +; ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str -# ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr -# ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr -# ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr -# ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr -# ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str +; ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +; ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr +; ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr +; ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr +; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str -#--- first.s +;--- first.s .cstring .p2align 2 -.asciz "get awkward offset" # length = 19 +.asciz "get awkward offset" ; length = 19 -#--- align.s.template +;--- align.s.template .cstring .p2align _myotherlongstr: -.asciz "myotherlongstr" # length = 15 +.asciz "myotherlongstr" ; length = 15 .p2align _otherlongstr: -.asciz "otherlongstr" # length = 13, tail offset = 2 +.asciz "otherlongstr" ; length = 13, tail offset = 2 .p2align _longstr: -.asciz "longstr" # length = 8, tail offset = 7 +.asciz "longstr" ; length = 8, tail offset = 7 .p2align _otherstr: -.asciz "otherstr" # length = 9 +.asciz "otherstr" ; length = 9 .p2align _str: -.asciz "str" # length = 4, tail offset = 5 +.asciz "str" ; length = 4, tail offset = 5 diff --git a/lld/test/MachO/order-file-cstring-tailmerge.s b/lld/test/MachO/order-file-cstring-tailmerge.s new file mode 100644 index 0000000000000..4f177ccf1c14f --- /dev/null +++ b/lld/test/MachO/order-file-cstring-tailmerge.s @@ -0,0 +1,56 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +; RUN: %lld -dylib -arch arm64 --deduplicate-strings --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s +; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED + +; CHECK: _str2 +; CHECK: _str1 +; CHECK: _superstr2 +; CHECK: _superstr3 +; CHECK: _superstr1 +; CHECK: _str3 + +; str1 has a higher priority than superstr1, so str1 must be ordered before +; str3, even though superstr1 is before superstr3 in the orderfile. + +; MERGED: _superstr2 +; MERGED: _str2 +; MERGED: _superstr1 +; MERGED: _str1 +; MERGED: _superstr3 +; MERGED: _str3 + +;--- a.s +.cstring + _superstr1: +.asciz "superstr1" + _str1: +.asciz "str1" + _superstr2: +.asciz "superstr2" + _str2: +.asciz "str2" + _superstr3: +.asciz "superstr3" + _str3: +.asciz "str3" + +; TODO: We could use update_test_body.py to generate the hashes for the +; orderfile. Unfortunately, it seems that LLVM has a different hash +; implementation than the xxh64sum tool. See +; DeduplicatedCStringSection::getStringOffset() for hash details. +; +; while IFS="" read -r line; do +; echo -n $line | xxh64sum | awk '{printf "CSTR;%010d", and(strtonum("0x"$1), 0x7FFFFFFF)}' +; echo " # $line" +; done < orderfile.txt.template + +;--- orderfile.txt +CSTR;1236462241 # str2 +CSTR;1526669509 # str1 +CSTR;1563550684 # superstr2 +CSTR;1044337806 # superstr3 +CSTR;262417687 # superstr1 +CSTR;717161398 # str3 From 4fbcb74168cfd7fc1b85417d3440b0ebdd12650a Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Wed, 1 Oct 2025 14:24:51 -0700 Subject: [PATCH 09/10] fix release notes --- lld/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index eae632ff9e796..29db1cdf9e9c4 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -46,7 +46,7 @@ MachO Improvements * ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name. (`#158720 `_) -* cstrings sections are now tail merged. +* ``--tail-merge-strings`` enables tail merging of cstring literals. (`#161262 `_) WebAssembly Improvements From 43ac02198a2fce4375ea729bc7fedbeb5dcdedd0 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Fri, 3 Oct 2025 09:30:18 -0700 Subject: [PATCH 10/10] remove --deduplicate-strings from tests which is the default already --- lld/test/MachO/cstring-tailmerge-objc.s | 4 ++-- lld/test/MachO/cstring-tailmerge.s | 6 +++--- lld/test/MachO/order-file-cstring-tailmerge.s | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lld/test/MachO/cstring-tailmerge-objc.s b/lld/test/MachO/cstring-tailmerge-objc.s index 270dcc7562613..46b2bbf9dcd9a 100644 --- a/lld/test/MachO/cstring-tailmerge-objc.s +++ b/lld/test/MachO/cstring-tailmerge-objc.s @@ -5,10 +5,10 @@ ; ObjCSelRefsHelper::makeSelRef() still works correctly ; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o -; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/a.o -o %t/a +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/a.o -o %t/a ; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error -; RUN: %lld -dylib -arch arm64 --deduplicate-strings --no-tail-merge-strings %t/a.o -o %t/nomerge +; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings %t/a.o -o %t/nomerge ; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error ; CHECK: withBar:error: diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s index cf780b0fc6b90..740f971eb4bb8 100644 --- a/lld/test/MachO/cstring-tailmerge.s +++ b/lld/test/MachO/cstring-tailmerge.s @@ -10,13 +10,13 @@ ; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o ; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o -; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1 +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1 ; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1 -; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2 +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2 ; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2 -; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4 +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4 ; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4 ; CHECK: Contents of (__TEXT,__cstring) section diff --git a/lld/test/MachO/order-file-cstring-tailmerge.s b/lld/test/MachO/order-file-cstring-tailmerge.s index 4f177ccf1c14f..20a4d162c573a 100644 --- a/lld/test/MachO/order-file-cstring-tailmerge.s +++ b/lld/test/MachO/order-file-cstring-tailmerge.s @@ -2,8 +2,8 @@ ; RUN: rm -rf %t && split-file %s %t ; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o -; RUN: %lld -dylib -arch arm64 --deduplicate-strings --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s -; RUN: %lld -dylib -arch arm64 --deduplicate-strings --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED +; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s +; RUN: %lld -dylib -arch arm64 --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED ; CHECK: _str2 ; CHECK: _str1