-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[BOLT][DWARF] Slice .debug_str from the DWP for each CU #159540
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-bolt Author: Liu Ke (Sockke) ChangesSlice .debug_str from the DWP for each CU using .debug_str_offsets and emit it, instead of directly copying the global .debug_str, in order to address the bloat issue of DWO after updates. (more details here - #155766) I have added a test case for DWARF5, and the test case for DWARF4 depend on #155619. Patch is 30.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159540.diff 4 Files Affected:
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 6752489ad562a..7847eab7e4822 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -1725,6 +1725,65 @@ StringRef getSectionName(const SectionRef &Section) {
return Name;
}
+// Exctracts some appropriate slices of .debug_str.dwo from DWP.
+// Updates the .debug_str_offets.dwo for CUs.
+void UpdateStrAndStrOffsets(StringRef StrDWOContent,
+ StringRef StrOffsetsContent,
+ SmallVectorImpl<StringRef> &StrDWOOutData,
+ std::string &StrOffsetsOutData,
+ unsigned DwarfVersion, bool IsLittleEndian) {
+ const llvm::endianness Endian =
+ IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
+ // ignore DWARF64
+ const uint64_t HeaderOffset = (DwarfVersion >= 5) ? 8 : 0;
+ const uint64_t NumOffsets = (StrOffsetsContent.size() - HeaderOffset) / 4;
+
+ DataExtractor Extractor(StrOffsetsContent, IsLittleEndian, 0);
+ uint64_t ExtractionOffset = HeaderOffset;
+
+ using StringFragment = DWARFUnitIndex::Entry::SectionContribution;
+ auto getStringLength = [](StringRef Content, uint64_t Offset) -> uint64_t {
+ size_t NullPos = Content.find('\0', Offset);
+ return (NullPos != StringRef::npos) ? (NullPos - Offset + 1) : 0;
+ };
+ auto isContiguous = [](const StringFragment &Fragment,
+ uint64_t NextOffset) -> bool {
+ return NextOffset == Fragment.getOffset() + Fragment.getLength();
+ };
+ std::optional<StringFragment> CurrentFragment;
+ uint64_t AccumulatedStrLen = 0;
+ for (uint64_t I = 0; I < NumOffsets; ++I) {
+ const uint64_t StrOffset = Extractor.getU32(&ExtractionOffset);
+ const uint64_t StringLength = getStringLength(StrDWOContent, StrOffset);
+ if (!CurrentFragment) {
+ // first init
+ CurrentFragment = StringFragment(StrOffset, StringLength);
+ } else {
+ if (isContiguous(*CurrentFragment, StrOffset)) {
+ // expand the current fragment
+ CurrentFragment->setLength(CurrentFragment->getLength() + StringLength);
+ } else {
+ // save the current fragment and start a new one
+ StrDWOOutData.push_back(StrDWOContent.substr(
+ CurrentFragment->getOffset(), CurrentFragment->getLength()));
+ CurrentFragment = StringFragment(StrOffset, StringLength);
+ }
+ }
+ if (AccumulatedStrLen != StrOffset) {
+ // update str offsets
+ if (StrOffsetsOutData.empty())
+ StrOffsetsOutData = StrOffsetsContent.str();
+ llvm::support::endian::write32(&StrOffsetsOutData[HeaderOffset + I * 4],
+ static_cast<uint32_t>(AccumulatedStrLen),
+ Endian);
+ }
+ AccumulatedStrLen += StringLength;
+ }
+ if (CurrentFragment)
+ StrDWOOutData.push_back(StrDWOContent.substr(CurrentFragment->getOffset(),
+ CurrentFragment->getLength()));
+}
+
// Exctracts an appropriate slice if input is DWP.
// Applies patches or overwrites the section.
std::optional<StringRef> updateDebugData(
@@ -1890,6 +1949,10 @@ void DWARFRewriter::writeDWOFiles(
}
}
+ StringRef StrDWOContent;
+ StringRef StrOffsetsContent;
+ llvm::SmallVector<StringRef, 3> StrDWOOutData;
+ std::string StrOffsetsOutData;
for (const SectionRef &Section : File->sections()) {
std::unique_ptr<DebugBufferVector> OutputData;
StringRef SectionName = getSectionName(Section);
@@ -1897,11 +1960,47 @@ void DWARFRewriter::writeDWOFiles(
continue;
Expected<StringRef> ContentsExp = Section.getContents();
assert(ContentsExp && "Invalid contents.");
+ if (IsDWP && SectionName == "debug_str.dwo") {
+ StrDWOContent = *ContentsExp;
+ continue;
+ }
if (std::optional<StringRef> OutData = updateDebugData(
(*DWOCU)->getContext(), SectionName, *ContentsExp, KnownSections,
*Streamer, *this, CUDWOEntry, DWOId, OutputData, RangeListssWriter,
- LocWriter, StrOffstsWriter, StrWriter, OverridenSections))
+ LocWriter, StrOffstsWriter, StrWriter, OverridenSections)) {
+ if (IsDWP && SectionName == "debug_str_offsets.dwo") {
+ StrOffsetsContent = *OutData;
+ continue;
+ }
Streamer->emitBytes(*OutData);
+ }
+ }
+
+ if (IsDWP) {
+ // Handling both .debug_str.dwo and .debug_str_offsets.dwo concurrently. In
+ // the original DWP, .debug_str is a deduplicated global table, and the
+ // .debug_str.dwo slice for a single CU needs to be extracted according to
+ // .debug_str_offsets.dwo.
+ UpdateStrAndStrOffsets(StrDWOContent, StrOffsetsContent, StrDWOOutData,
+ StrOffsetsOutData, CU.getVersion(),
+ (*DWOCU)->getContext().isLittleEndian());
+ auto SectionIter = KnownSections.find("debug_str.dwo");
+ if (SectionIter != KnownSections.end()) {
+ Streamer->switchSection(SectionIter->second.first);
+ for (size_t i = 0; i < StrDWOOutData.size(); ++i) {
+ StringRef OutData = StrDWOOutData[i];
+ if (!OutData.empty())
+ Streamer->emitBytes(OutData);
+ }
+ }
+ SectionIter = KnownSections.find("debug_str_offsets.dwo");
+ if (SectionIter != KnownSections.end()) {
+ Streamer->switchSection(SectionIter->second.first);
+ if (!StrOffsetsOutData.empty())
+ Streamer->emitBytes(StrOffsetsOutData);
+ else
+ Streamer->emitBytes(StrOffsetsContent);
+ }
}
Streamer->finish();
TempOut->keep();
diff --git a/bolt/test/X86/Inputs/dwarf5-debug-str-split-dwarf-helper.s b/bolt/test/X86/Inputs/dwarf5-debug-str-split-dwarf-helper.s
new file mode 100644
index 0000000000000..540f41a462123
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-debug-str-split-dwarf-helper.s
@@ -0,0 +1,226 @@
+# clang++ -g2 -gdwarf-5 -gsplit-dwarf=split -S helper.cpp
+# int getReturn() {
+# return 0;
+# }
+ .file "helper.cpp"
+ .text
+ .globl _Z9getReturnv # -- Begin function _Z9getReturnv
+ .p2align 4
+ .type _Z9getReturnv,@function
+_Z9getReturnv: # @_Z9getReturnv
+.Lfunc_begin0:
+ .file 0 "." "helper.cpp" md5 0xc7d7879297b54325c71b3e0cfbb65e2d
+ .loc 0 1 0 # helper.cpp:1:0
+ .cfi_startproc
+# %bb.0: # %entry
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+.Ltmp0:
+ .loc 0 2 3 prologue_end # helper.cpp:2:3
+ xorl %eax, %eax
+ .loc 0 2 3 epilogue_begin is_stmt 0 # helper.cpp:2:3
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+.Ltmp1:
+.Lfunc_end0:
+ .size _Z9getReturnv, .Lfunc_end0-_Z9getReturnv
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 74 # DW_TAG_skeleton_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .ascii "\264B" # DW_AT_GNU_pubnames
+ .byte 25 # DW_FORM_flag_present
+ .byte 118 # DW_AT_dwo_name
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 4 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad 5976014880088676049
+ .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .byte 0 # DW_AT_comp_dir
+ # DW_AT_GNU_pubnames
+ .byte 1 # DW_AT_dwo_name
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 12 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "helper.dwo" # string offset=2
+ .section .debug_str_offsets,"",@progbits
+ .long .Lskel_string0
+ .long .Lskel_string1
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 28 # Length of String Offsets Set
+ .short 5
+ .short 0
+ .section .debug_str.dwo,"eMS",@progbits,1
+.Linfo_string0:
+ .asciz "_Z9getReturnv" # string offset=0
+.Linfo_string1:
+ .asciz "getReturn" # string offset=14
+.Linfo_string2:
+ .asciz "int" # string offset=24
+.Linfo_string3:
+ .asciz "clang version 22.0.0" # string offset=28
+.Linfo_string4:
+ .asciz "helper.cpp" # string offset=49
+.Linfo_string5:
+ .asciz "helper.dwo" # string offset=60
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 0
+ .long 14
+ .long 24
+ .long 28
+ .long 49
+ .long 60
+ .section .debug_info.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 5 # DWARF version number
+ .byte 5 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long 0 # Offset Into Abbrev. Section
+ .quad 5976014880088676049
+ .byte 1 # Abbrev [1] 0x14:0x1b DW_TAG_compile_unit
+ .byte 3 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 4 # DW_AT_name
+ .byte 5 # DW_AT_dwo_name
+ .byte 2 # Abbrev [2] 0x1a:0x10 DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 0 # DW_AT_linkage_name
+ .byte 1 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 42 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x2a:0x4 DW_TAG_base_type
+ .byte 2 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .section .debug_abbrev.dwo,"e",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 118 # DW_AT_dwo_name
+ .byte 37 # DW_FORM_strx1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+.Ldebug_addr_end0:
+ .section .debug_gnu_pubnames,"",@progbits
+ .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info
+.LpubNames_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 40 # Compilation Unit Length
+ .long 26 # DIE offset
+ .byte 48 # Attributes: FUNCTION, EXTERNAL
+ .asciz "getReturn" # External Name
+ .long 0 # End Mark
+.LpubNames_end0:
+ .section .debug_gnu_pubtypes,"",@progbits
+ .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info
+.LpubTypes_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 40 # Compilation Unit Length
+ .long 42 # DIE offset
+ .byte 144 # Attributes: TYPE, STATIC
+ .asciz "int" # External Name
+ .long 0 # End Mark
+.LpubTypes_end0:
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/Inputs/dwarf5-debug-str-split-dwarf-main.s b/bolt/test/X86/Inputs/dwarf5-debug-str-split-dwarf-main.s
new file mode 100644
index 0000000000000..d20eab14edf00
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-debug-str-split-dwarf-main.s
@@ -0,0 +1,225 @@
+# clang++ -g2 -gdwarf-5 -gsplit-dwarf=split -S main.cpp
+# extern int getReturn();
+# int main() {
+# return getReturn();
+# }
+ .file "main.cpp"
+ .text
+ .globl main # -- Begin function main
+ .p2align 4
+ .type main,@function
+main: # @main
+.Lfunc_begin0:
+ .file 0 "." "main.cpp" md5 0x9cdef858e26cf684ed9ef3b60e05bdad
+ .loc 0 2 0 # main.cpp:2:0
+ .cfi_startproc
+# %bb.0: # %entry
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+ subq $16, %rsp
+ movl $0, -4(%rbp)
+.Ltmp0:
+ .loc 0 3 10 prologue_end # main.cpp:3:10
+ callq _Z9getReturnv@PLT
+ .loc 0 3 3 epilogue_begin is_stmt 0 # main.cpp:3:3
+ addq $16, %rsp
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+.Ltmp1:
+.Lfunc_end0:
+ .size main, .Lfunc_end0-main
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 74 # DW_TAG_skeleton_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .ascii "\264B" # DW_AT_GNU_pubnames
+ .byte 25 # DW_FORM_flag_present
+ .byte 118 # DW_AT_dwo_name
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 4 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad -9094791692727444213
+ .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .byte 0 # DW_AT_comp_dir
+ # DW_AT_GNU_pubnames
+ .byte 1 # DW_AT_dwo_name
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 12 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "main.dwo" # string offset=2
+ .section .debug_str_offsets,"",@progbits
+ .long .Lskel_string0
+ .long .Lskel_string1
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 24 ...
[truncated]
|
Slice .debug_str from the DWP for each CU using .debug_str_offsets and emit it, instead of directly copying the global .debug_str, in order to address the bloat issue of DWO after updates. (more details here - #155766 )
I have added a test case for DWARF5, and a test case for DWARF4 depends on #155619 .