Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions bolt/include/bolt/Core/DebugData.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,11 @@ class DebugStrOffsetsWriter {
return std::move(StrOffsetsBuffer);
}

StringRef bufferStr() {
return StringRef(reinterpret_cast<const char *>(StrOffsetsBuffer->data()),
StrOffsetsBuffer->size());
}

/// Initializes Buffer and Stream.
void initialize(DWARFUnit &Unit);

Expand Down Expand Up @@ -507,6 +512,11 @@ class DebugStrWriter {
return std::move(StrBuffer);
}

StringRef bufferStr() {
return StringRef(reinterpret_cast<const char *>(StrBuffer->data()),
StrBuffer->size());
}

/// Adds string to .debug_str.
/// On first invocation it initializes internal data structures.
uint32_t addString(StringRef Str);
Expand Down
108 changes: 107 additions & 1 deletion bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1725,6 +1725,65 @@ StringRef getSectionName(const SectionRef &Section) {
return Name;
}

// Exctracts some appropriate slices of .debug_str.dwo from DWP.
// Updates the .debug_str_offets.dwo for CUs.
void UpdateStrAndStrOffsets(StringRef StrDWOContent,
StringRef StrOffsetsContent,
SmallVectorImpl<StringRef> &StrDWOOutData,
std::string &StrOffsetsOutData,
unsigned DwarfVersion, bool IsLittleEndian) {
const llvm::endianness Endian =
IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
// ignore DWARF64
const uint64_t HeaderOffset = (DwarfVersion >= 5) ? 8 : 0;
const uint64_t NumOffsets = (StrOffsetsContent.size() - HeaderOffset) / 4;

DataExtractor Extractor(StrOffsetsContent, IsLittleEndian, 0);
uint64_t ExtractionOffset = HeaderOffset;

using StringFragment = DWARFUnitIndex::Entry::SectionContribution;
auto getStringLength = [](StringRef Content, uint64_t Offset) -> uint64_t {
size_t NullPos = Content.find('\0', Offset);
return (NullPos != StringRef::npos) ? (NullPos - Offset + 1) : 0;
};
auto isContiguous = [](const StringFragment &Fragment,
uint64_t NextOffset) -> bool {
return NextOffset == Fragment.getOffset() + Fragment.getLength();
};
std::optional<StringFragment> CurrentFragment;
uint64_t AccumulatedStrLen = 0;
for (uint64_t I = 0; I < NumOffsets; ++I) {
const uint64_t StrOffset = Extractor.getU32(&ExtractionOffset);
const uint64_t StringLength = getStringLength(StrDWOContent, StrOffset);
if (!CurrentFragment) {
// first init
CurrentFragment = StringFragment(StrOffset, StringLength);
} else {
if (isContiguous(*CurrentFragment, StrOffset)) {
// expand the current fragment
CurrentFragment->setLength(CurrentFragment->getLength() + StringLength);
} else {
// save the current fragment and start a new one
StrDWOOutData.push_back(StrDWOContent.substr(
CurrentFragment->getOffset(), CurrentFragment->getLength()));
CurrentFragment = StringFragment(StrOffset, StringLength);
}
}
if (AccumulatedStrLen != StrOffset) {
// update str offsets
if (StrOffsetsOutData.empty())
StrOffsetsOutData = StrOffsetsContent.str();
llvm::support::endian::write32(&StrOffsetsOutData[HeaderOffset + I * 4],
static_cast<uint32_t>(AccumulatedStrLen),
Endian);
}
AccumulatedStrLen += StringLength;
}
if (CurrentFragment)
StrDWOOutData.push_back(StrDWOContent.substr(CurrentFragment->getOffset(),
CurrentFragment->getLength()));
}

// Exctracts an appropriate slice if input is DWP.
// Applies patches or overwrites the section.
std::optional<StringRef> updateDebugData(
Expand Down Expand Up @@ -1774,6 +1833,8 @@ std::optional<StringRef> updateDebugData(
errs() << "BOLT-WARNING: unsupported debug section: " << SectionName
<< "\n";
if (StrWriter.isInitialized()) {
if (CUDWOEntry)
return StrWriter.bufferStr();
OutputBuffer = StrWriter.releaseBuffer();
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
OutputBuffer->size());
Expand All @@ -1788,6 +1849,8 @@ std::optional<StringRef> updateDebugData(
}
case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
if (StrOffstsWriter.isFinalized()) {
if (CUDWOEntry)
return StrOffstsWriter.bufferStr();
OutputBuffer = StrOffstsWriter.releaseBuffer();
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
OutputBuffer->size());
Expand Down Expand Up @@ -1890,18 +1953,61 @@ void DWARFRewriter::writeDWOFiles(
}
}

StringRef StrDWOContent;
StringRef StrOffsetsContent;
llvm::SmallVector<StringRef, 3> StrDWOOutData;
std::string StrOffsetsOutData;
for (const SectionRef &Section : File->sections()) {
std::unique_ptr<DebugBufferVector> OutputData;
StringRef SectionName = getSectionName(Section);
if (SectionName == "debug_rnglists.dwo")
continue;
Expected<StringRef> ContentsExp = Section.getContents();
assert(ContentsExp && "Invalid contents.");
if (IsDWP && SectionName == "debug_str.dwo") {
if (StrWriter.isInitialized())
StrDWOContent = StrWriter.bufferStr();
else
StrDWOContent = *ContentsExp;
continue;
}
if (std::optional<StringRef> OutData = updateDebugData(
(*DWOCU)->getContext(), SectionName, *ContentsExp, KnownSections,
*Streamer, *this, CUDWOEntry, DWOId, OutputData, RangeListssWriter,
LocWriter, StrOffstsWriter, StrWriter, OverridenSections))
LocWriter, StrOffstsWriter, StrWriter, OverridenSections)) {
if (IsDWP && SectionName == "debug_str_offsets.dwo") {
StrOffsetsContent = *OutData;
continue;
}
Streamer->emitBytes(*OutData);
}
}

if (IsDWP) {
// Handling both .debug_str.dwo and .debug_str_offsets.dwo concurrently. In
// the original DWP, .debug_str is a deduplicated global table, and the
// .debug_str.dwo slice for a single CU needs to be extracted according to
// .debug_str_offsets.dwo.
UpdateStrAndStrOffsets(StrDWOContent, StrOffsetsContent, StrDWOOutData,
StrOffsetsOutData, CU.getVersion(),
(*DWOCU)->getContext().isLittleEndian());
auto SectionIter = KnownSections.find("debug_str.dwo");
if (SectionIter != KnownSections.end()) {
Streamer->switchSection(SectionIter->second.first);
for (size_t i = 0; i < StrDWOOutData.size(); ++i) {
StringRef OutData = StrDWOOutData[i];
if (!OutData.empty())
Streamer->emitBytes(OutData);
}
}
SectionIter = KnownSections.find("debug_str_offsets.dwo");
if (SectionIter != KnownSections.end()) {
Streamer->switchSection(SectionIter->second.first);
if (!StrOffsetsOutData.empty())
Streamer->emitBytes(StrOffsetsOutData);
else
Streamer->emitBytes(StrOffsetsContent);
}
}
Streamer->finish();
TempOut->keep();
Expand Down
226 changes: 226 additions & 0 deletions bolt/test/X86/Inputs/dwarf5-debug-str-split-dwarf-helper.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
# clang++ -g2 -gdwarf-5 -gsplit-dwarf=split -S helper.cpp
# int getReturn() {
# return 0;
# }
.file "helper.cpp"
.text
.globl _Z9getReturnv # -- Begin function _Z9getReturnv
.p2align 4
.type _Z9getReturnv,@function
_Z9getReturnv: # @_Z9getReturnv
.Lfunc_begin0:
.file 0 "." "helper.cpp" md5 0xc7d7879297b54325c71b3e0cfbb65e2d
.loc 0 1 0 # helper.cpp:1:0
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
.Ltmp0:
.loc 0 2 3 prologue_end # helper.cpp:2:3
xorl %eax, %eax
.loc 0 2 3 epilogue_begin is_stmt 0 # helper.cpp:2:3
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Ltmp1:
.Lfunc_end0:
.size _Z9getReturnv, .Lfunc_end0-_Z9getReturnv
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 74 # DW_TAG_skeleton_unit
.byte 0 # DW_CHILDREN_no
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 114 # DW_AT_str_offsets_base
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 37 # DW_FORM_strx1
.ascii "\264B" # DW_AT_GNU_pubnames
.byte 25 # DW_FORM_flag_present
.byte 118 # DW_AT_dwo_name
.byte 37 # DW_FORM_strx1
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 115 # DW_AT_addr_base
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 5 # DWARF version number
.byte 4 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long .debug_abbrev # Offset Into Abbrev. Section
.quad 5976014880088676049
.byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit
.long .Lline_table_start0 # DW_AT_stmt_list
.long .Lstr_offsets_base0 # DW_AT_str_offsets_base
.byte 0 # DW_AT_comp_dir
# DW_AT_GNU_pubnames
.byte 1 # DW_AT_dwo_name
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.long .Laddr_table_base0 # DW_AT_addr_base
.Ldebug_info_end0:
.section .debug_str_offsets,"",@progbits
.long 12 # Length of String Offsets Set
.short 5
.short 0
.Lstr_offsets_base0:
.section .debug_str,"MS",@progbits,1
.Lskel_string0:
.asciz "." # string offset=0
.Lskel_string1:
.asciz "helper.dwo" # string offset=2
.section .debug_str_offsets,"",@progbits
.long .Lskel_string0
.long .Lskel_string1
.section .debug_str_offsets.dwo,"e",@progbits
.long 28 # Length of String Offsets Set
.short 5
.short 0
.section .debug_str.dwo,"eMS",@progbits,1
.Linfo_string0:
.asciz "_Z9getReturnv" # string offset=0
.Linfo_string1:
.asciz "getReturn" # string offset=14
.Linfo_string2:
.asciz "int" # string offset=24
.Linfo_string3:
.asciz "clang version 22.0.0" # string offset=28
.Linfo_string4:
.asciz "helper.cpp" # string offset=49
.Linfo_string5:
.asciz "helper.dwo" # string offset=60
.section .debug_str_offsets.dwo,"e",@progbits
.long 0
.long 14
.long 24
.long 28
.long 49
.long 60
.section .debug_info.dwo,"e",@progbits
.long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
.Ldebug_info_dwo_start0:
.short 5 # DWARF version number
.byte 5 # DWARF Unit Type
.byte 8 # Address Size (in bytes)
.long 0 # Offset Into Abbrev. Section
.quad 5976014880088676049
.byte 1 # Abbrev [1] 0x14:0x1b DW_TAG_compile_unit
.byte 3 # DW_AT_producer
.short 33 # DW_AT_language
.byte 4 # DW_AT_name
.byte 5 # DW_AT_dwo_name
.byte 2 # Abbrev [2] 0x1a:0x10 DW_TAG_subprogram
.byte 0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 1 # DW_AT_frame_base
.byte 86
.byte 0 # DW_AT_linkage_name
.byte 1 # DW_AT_name
.byte 0 # DW_AT_decl_file
.byte 1 # DW_AT_decl_line
.long 42 # DW_AT_type
# DW_AT_external
.byte 3 # Abbrev [3] 0x2a:0x4 DW_TAG_base_type
.byte 2 # DW_AT_name
.byte 5 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_dwo_end0:
.section .debug_abbrev.dwo,"e",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 37 # DW_FORM_strx1
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 118 # DW_AT_dwo_name
.byte 37 # DW_FORM_strx1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 0 # DW_CHILDREN_no
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 64 # DW_AT_frame_base
.byte 24 # DW_FORM_exprloc
.byte 110 # DW_AT_linkage_name
.byte 37 # DW_FORM_strx1
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 3 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_addr,"",@progbits
.long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
.Ldebug_addr_start0:
.short 5 # DWARF version number
.byte 8 # Address size
.byte 0 # Segment selector size
.Laddr_table_base0:
.quad .Lfunc_begin0
.Ldebug_addr_end0:
.section .debug_gnu_pubnames,"",@progbits
.long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info
.LpubNames_start0:
.short 2 # DWARF Version
.long .Lcu_begin0 # Offset of Compilation Unit Info
.long 40 # Compilation Unit Length
.long 26 # DIE offset
.byte 48 # Attributes: FUNCTION, EXTERNAL
.asciz "getReturn" # External Name
.long 0 # End Mark
.LpubNames_end0:
.section .debug_gnu_pubtypes,"",@progbits
.long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info
.LpubTypes_start0:
.short 2 # DWARF Version
.long .Lcu_begin0 # Offset of Compilation Unit Info
.long 40 # Compilation Unit Length
.long 42 # DIE offset
.byte 144 # Attributes: TYPE, STATIC
.asciz "int" # External Name
.long 0 # End Mark
.LpubTypes_end0:
.ident "clang version 22.0.0"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:
Loading
Loading