From fccf37fcddd29a20744796e9f03a580dfa1b0f4a Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 10 Nov 2025 21:22:39 -0800 Subject: [PATCH 1/9] Modify llvm-dwp to be able to emit string tables over 4GB without losing data. We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset. --- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 4 + llvm/include/llvm/DWP/DWP.h | 4 +- llvm/include/llvm/DWP/DWPStringPool.h | 6 +- llvm/lib/DWP/DWP.cpp | 78 +++++++++++++++---- 4 files changed, 72 insertions(+), 20 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index b78e6ce807bca..4a3dad2385c2c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -376,6 +376,10 @@ void DWARFUnit::SetDwoStrOffsetsBase() { // Skip padding. baseOffset += 2; + } else { + // Size of offset for .debug_str_offsets is same as DWARF offset byte size + // of the DWARFUnit for DWARF version 4 and earlier. + m_str_offsets_size = m_header.getDwarfOffsetByteSize(); } SetStrOffsetsBase(baseOffset); diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h index a759bae10d160..cc38369658eaa 100644 --- a/llvm/include/llvm/DWP/DWP.h +++ b/llvm/include/llvm/DWP/DWP.h @@ -70,6 +70,8 @@ struct CompileUnitIdentifiers { LLVM_ABI Error write(MCStreamer &Out, ArrayRef Inputs, OnCuIndexOverflow OverflowOptValue); +typedef std::vector> SectionLengths; + LLVM_ABI Error handleSection( const StringMap> &KnownSections, const MCSection *StrSection, const MCSection *StrOffsetSection, @@ -82,7 +84,7 @@ LLVM_ABI Error handleSection( std::vector &CurTypesSection, std::vector &CurInfoSection, StringRef &AbbrevSection, StringRef &CurCUIndexSection, StringRef &CurTUIndexSection, - std::vector> &SectionLength); + SectionLengths &SectionLength); LLVM_ABI Expected parseInfoSectionUnitHeader(StringRef Info); diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h index 1354b46f156b6..d1486ff7872e1 100644 --- a/llvm/include/llvm/DWP/DWPStringPool.h +++ b/llvm/include/llvm/DWP/DWPStringPool.h @@ -32,13 +32,13 @@ class DWPStringPool { MCStreamer &Out; MCSection *Sec; - DenseMap Pool; - uint32_t Offset = 0; + DenseMap Pool; + uint64_t Offset = 0; public: DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {} - uint32_t getOffset(const char *Str, unsigned Length) { + uint64_t getOffset(const char *Str, unsigned Length) { assert(strlen(Str) + 1 == Length && "Ensure length hint is correct"); auto Pair = Pool.insert(std::make_pair(Str, Offset)); diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp index b565edbfe96db..54edce81208b5 100644 --- a/llvm/lib/DWP/DWP.cpp +++ b/llvm/lib/DWP/DWP.cpp @@ -413,33 +413,43 @@ Expected parseInfoSectionUnitHeader(StringRef Info) { } static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data, - DenseMap &OffsetRemapping, - uint64_t &Offset, uint64_t &Size) { + DenseMap &OffsetRemapping, + uint64_t &Offset, const uint64_t Size, + uint32_t OldOffsetSize, uint32_t NewOffsetSize) { while (Offset < Size) { - auto OldOffset = Data.getU32(&Offset); - auto NewOffset = OffsetRemapping[OldOffset]; - Out.emitIntValue(NewOffset, 4); + const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize); + const uint64_t NewOffset = OffsetRemapping[OldOffset]; + assert(NewOffsetSize == 8 || NewOffset <= UINT32_MAX); + Out.emitIntValue(NewOffset, NewOffsetSize); } } void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, MCSection *StrOffsetSection, StringRef CurStrSection, - StringRef CurStrOffsetSection, uint16_t Version) { + StringRef CurStrOffsetSection, uint16_t Version, + SectionLengths &SectionLength) { // Could possibly produce an error or warning if one of these was non-null but // the other was null. if (CurStrSection.empty() || CurStrOffsetSection.empty()) return; - DenseMap OffsetRemapping; + DenseMap OffsetRemapping; DataExtractor Data(CurStrSection, true, 0); uint64_t LocalOffset = 0; uint64_t PrevOffset = 0; + + // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can + // emit a DWARF64 .debug_str_offsets table for this compile unit. + uint32_t OldOffsetSize = 4; + uint32_t NewOffsetSize = 4; while (const char *S = Data.getCStr(&LocalOffset)) { - OffsetRemapping[PrevOffset] = - Strings.getOffset(S, LocalOffset - PrevOffset); + uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset); + OffsetRemapping[PrevOffset] = NewOffset; + if (NewOffset > UINT32_MAX) + NewOffsetSize = 8; PrevOffset = LocalOffset; } @@ -451,7 +461,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, uint64_t Size = CurStrOffsetSection.size(); if (Version > 4) { while (Offset < Size) { - uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version); + const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version); assert(HeaderSize <= Size - Offset && "StrOffsetSection size is less than its header"); @@ -461,16 +471,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, if (HeaderSize == 8) { ContributionSize = Data.getU32(&HeaderLengthOffset); } else if (HeaderSize == 16) { + OldOffsetSize = 8; HeaderLengthOffset += 4; // skip the dwarf64 marker ContributionSize = Data.getU64(&HeaderLengthOffset); } ContributionEnd = ContributionSize + HeaderLengthOffset; - Out.emitBytes(Data.getBytes(&Offset, HeaderSize)); - writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd); + + StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize); + if (OldOffsetSize == 4 && NewOffsetSize == 8) { + // We had a DWARF32 .debug_str_offsets header, but we need to emit + // some string offsets that require 64 bit offsets on the .debug_str + // section. Emit the .debug_str_offsets header in DWARF64 format so we + // can emit string offsets that exceed UINT32_MAX without truncating + // the string offset. + + // 2 bytes for DWARF version, 2 bytes pad. + const uint64_t VersionPadSize = 4; + const uint64_t NewLength = + (ContributionSize - VersionPadSize) * 2 + VersionPadSize; + // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64 + // value followed by the 8 byte updated length. + Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4); + Out.emitIntValue(NewLength, 8); + // Emit DWARF version as a 2 byte integer. + Out.emitIntValue(Version, 2); + // Emit 2 bytes of padding. + Out.emitIntValue(0, 2); + // Update the .debug_str_offsets section length contribution for the + // this .dwo file. + for (auto &Pair : SectionLength) { + if (Pair.first == DW_SECT_STR_OFFSETS) { + Pair.second = NewLength + 12; + break; + } + } + } else { + // Just emit the same .debug_str_offsets header. + Out.emitBytes(HeaderBytes); + } + writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd, + OldOffsetSize, NewOffsetSize); } } else { - writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size); + assert(OldOffsetSize == NewOffsetSize); + writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize, + NewOffsetSize); } } @@ -562,7 +608,7 @@ Error handleSection( std::vector &CurTypesSection, std::vector &CurInfoSection, StringRef &AbbrevSection, StringRef &CurCUIndexSection, StringRef &CurTUIndexSection, - std::vector> &SectionLength) { + SectionLengths &SectionLength) { if (Section.isBSS()) return Error::success(); @@ -684,7 +730,7 @@ Error write(MCStreamer &Out, ArrayRef Inputs, // This maps each section contained in this file to its length. // This information is later on used to calculate the contributions, // i.e. offset and length, of each compile/type unit to a section. - std::vector> SectionLength; + SectionLengths SectionLength; for (const auto &Section : Obj.sections()) if (auto Err = handleSection( @@ -713,7 +759,7 @@ Error write(MCStreamer &Out, ArrayRef Inputs, } writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, - CurStrOffsetSection, Header.Version); + CurStrOffsetSection, Header.Version, SectionLength); for (auto Pair : SectionLength) { auto Index = getContributionIndex(Pair.first, IndexVersion); From 98b0ee5104841e6848fd5861e6a3c234d2f80ba1 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Fri, 14 Nov 2025 16:41:02 -0800 Subject: [PATCH 2/9] Merge with upstream and remove extra code that isn't needed. --- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 4a3dad2385c2c..b78e6ce807bca 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -376,10 +376,6 @@ void DWARFUnit::SetDwoStrOffsetsBase() { // Skip padding. baseOffset += 2; - } else { - // Size of offset for .debug_str_offsets is same as DWARF offset byte size - // of the DWARFUnit for DWARF version 4 and earlier. - m_str_offsets_size = m_header.getDwarfOffsetByteSize(); } SetStrOffsetsBase(baseOffset); From 604b5551e4cd5f82fff34cd8e6282b410bca1edd Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Nov 2025 11:22:39 -0800 Subject: [PATCH 3/9] Add new --force-dwarf64-str-offsets option to llvm-dwp and a test that uses it. This patch adds a new llvm-dwp option that can be used in testing to verify that llvm-dwp can successfully upgrade a .debug_str_offsets tables from DWARF32 to DWARF64. --- llvm/include/llvm/DWP/DWP.h | 3 +- llvm/lib/DWP/DWP.cpp | 15 ++-- .../llvm-dwp/X86/dwarf64-str-offsets.test | 72 +++++++++++++++++++ llvm/tools/llvm-dwp/Opts.td | 4 ++ llvm/tools/llvm-dwp/llvm-dwp.cpp | 6 +- 5 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h index cc38369658eaa..1ad4bbcefa988 100644 --- a/llvm/include/llvm/DWP/DWP.h +++ b/llvm/include/llvm/DWP/DWP.h @@ -68,7 +68,8 @@ struct CompileUnitIdentifiers { }; LLVM_ABI Error write(MCStreamer &Out, ArrayRef Inputs, - OnCuIndexOverflow OverflowOptValue); + OnCuIndexOverflow OverflowOptValue, + bool ForceDwarf64StringOffsets); typedef std::vector> SectionLengths; diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp index 54edce81208b5..a92cf2339506f 100644 --- a/llvm/lib/DWP/DWP.cpp +++ b/llvm/lib/DWP/DWP.cpp @@ -429,7 +429,8 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, MCSection *StrOffsetSection, StringRef CurStrSection, StringRef CurStrOffsetSection, uint16_t Version, - SectionLengths &SectionLength) { + SectionLengths &SectionLength, + const bool ForceDwarf64StringOffsets) { // Could possibly produce an error or warning if one of these was non-null but // the other was null. if (CurStrSection.empty() || CurStrOffsetSection.empty()) @@ -442,9 +443,11 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, uint64_t PrevOffset = 0; // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can - // emit a DWARF64 .debug_str_offsets table for this compile unit. + // emit a DWARF64 .debug_str_offsets table for this compile unit. If the + // \a ForceDwarf64StringOffsets argument is true, then force the emission of + // DWARF64 .debug_str_offsets for testing. uint32_t OldOffsetSize = 4; - uint32_t NewOffsetSize = 4; + uint32_t NewOffsetSize = ForceDwarf64StringOffsets ? 8 : 4; while (const char *S = Data.getCStr(&LocalOffset)) { uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset); OffsetRemapping[PrevOffset] = NewOffset; @@ -666,7 +669,8 @@ Error handleSection( } Error write(MCStreamer &Out, ArrayRef Inputs, - OnCuIndexOverflow OverflowOptValue) { + OnCuIndexOverflow OverflowOptValue, + bool ForceDwarf64StringOffsets) { const auto &MCOFI = *Out.getContext().getObjectFileInfo(); MCSection *const StrSection = MCOFI.getDwarfStrDWOSection(); MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection(); @@ -759,7 +763,8 @@ Error write(MCStreamer &Out, ArrayRef Inputs, } writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, - CurStrOffsetSection, Header.Version, SectionLength); + CurStrOffsetSection, Header.Version, SectionLength, + ForceDwarf64StringOffsets); for (auto Pair : SectionLength) { auto Index = getContributionIndex(Pair.first, IndexVersion); diff --git a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test new file mode 100644 index 0000000000000..f73461b349688 --- /dev/null +++ b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test @@ -0,0 +1,72 @@ +# This test tests that llvm-dwp can successfully promote .debug_str_offsets to +# DWARF64. We do this by using a hidden option to llvm-dwp which is +# "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can +# successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64 +# version. This allows us to test the functionality without having to create a +# 4GB .dwo file. + +# RUN: yaml2obj %s -o %t.dwo +# RUN: llvm-dwp %t.dwo -o %t.32.dwp +# RUN: llvm-dwp %t.dwo -o %t.64.dwp --force-dwarf64-str-offsets +# RUN: llvm-dwarfdump --debug-str-offsets %t.32.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.64.dwp | FileCheck --check-prefixes=DWARF64 %s + +# DWARF32: .debug_str_offsets.dwo contents: +# DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5 +# DWARF32-NEXT: 0x00000008: 00000000 "main" +# DWARF32-NEXT: 0x0000000c: 00000005 "int" +# DWARF32-NEXT: 0x00000010: 00000009 "argc" +# DWARF32-NEXT: 0x00000014: 0000000e "argv" +# DWARF32-NEXT: 0x00000018: 00000013 "char" +# DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)" +# DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp" +# DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo" + +# DWARF64: .debug_str_offsets.dwo contents: +# DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5 +# DWARF64-NEXT: 0x00000010: 0000000000000000 "main" +# DWARF64-NEXT: 0x00000018: 0000000000000005 "int" +# DWARF64-NEXT: 0x00000020: 0000000000000009 "argc" +# DWARF64-NEXT: 0x00000028: 000000000000000e "argv" +# DWARF64-NEXT: 0x00000030: 0000000000000013 "char" +# DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)" +# DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp" +# DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo" + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SectionHeaderStringTable: .strtab +Sections: + - Name: .debug_str_offsets.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: '24000000050000000000000005000000090000000E00000013000000180000004600000051000000' + - Name: .debug_str.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00 + - Name: .debug_info.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100 + - Name: .debug_abbrev.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000 + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .debug_str_offsets.dwo + - Name: .debug_str.dwo + - Name: .debug_info.dwo + - Name: .debug_abbrev.dwo +... diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td index 46593bc40ebae..fddeb86fdae3c 100644 --- a/llvm/tools/llvm-dwp/Opts.td +++ b/llvm/tools/llvm-dwp/Opts.td @@ -16,3 +16,7 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove "\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n" "\t\tthe 32 bit/4GB limits of the format.">, Values<"continue,soft-stop">; +def forceDwarf64StringOffsets : Flag<["-", "--"], "force-dwarf64-str-offsets">, + Flags<[HelpHidden]>, + HelpText<"Force all .debug_str_offsets to be emitted as DWARF64 tables. This " + "option is used for testing.">; diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index 31bad2d68982b..f735ecac50608 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -73,6 +73,7 @@ class DwpOptTable : public opt::GenericOptTable { static std::vector ExecFilenames; static std::string OutputFilename; static std::string ContinueOption; +static bool ForceDwarf64StringOffsets = false; static Expected> getDWOFilenames(StringRef ExecFilename) { @@ -160,6 +161,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { } } } + if (Args.getLastArg(OPT_forceDwarf64StringOffsets)) + ForceDwarf64StringOffsets = true; for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames)) ExecFilenames.emplace_back(A->getValue()); @@ -274,7 +277,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { if (!MS) return error("no object streamer for target " + TripleName, Context); - if (auto Err = write(*MS, DWOFilenames, OverflowOptValue)) { + if (auto Err = write(*MS, DWOFilenames, OverflowOptValue, + ForceDwarf64StringOffsets)) { logAllUnhandledErrors(std::move(Err), WithColor::error()); return 1; } From 011c2c187c73d86bcd3c0e5065d6da8954e99ecd Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Nov 2025 14:51:21 -0800 Subject: [PATCH 4/9] Add a --dwarf64-str-offsets option value. This option controls if llvm-dwp can promote a .debug_str_offsets table from DWARF32 to DWARF64. Setting this option value to "enabled" allows promotion of a DWARF32 .debug_str_offsets table to DWARF64 only if any string in the .debug_str_offsets table exceeds UINT32_MAX. Setting this option value to "disabled" (the default) will keep pre-existing behavior where all .debug_str_offsets tables will be emitted in the same format as are in each .dwo file. Setting this option value to "always" forces all .debug_str_offsets tables to be emitted as DWARF64 tables. This is used for testing. Removed the previous --force-dwarf64-str-offsets option. --- llvm/include/llvm/DWP/DWP.h | 8 +++++- llvm/lib/DWP/DWP.cpp | 18 ++++++++----- .../llvm-dwp/X86/dwarf64-str-offsets.test | 17 +++++++++--- llvm/tools/llvm-dwp/Opts.td | 14 +++++++--- llvm/tools/llvm-dwp/llvm-dwp.cpp | 27 ++++++++++++++++--- 5 files changed, 64 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h index 1ad4bbcefa988..16c4f9d701072 100644 --- a/llvm/include/llvm/DWP/DWP.h +++ b/llvm/include/llvm/DWP/DWP.h @@ -22,6 +22,12 @@ enum OnCuIndexOverflow { Continue, }; +enum Dwarf64StrOffsets { + Disabled, ///< Don't do any conversion of .debug_str_offsets tables. + Enabled, ///< Convert any .debug_str_offsets tables to DWARD64 if needed. + Always, ///< Always emit .debug_str_offsets talbes as DWARF64 for testing. +}; + struct UnitIndexEntry { DWARFUnitIndex::Entry::SectionContribution Contributions[8]; std::string Name; @@ -69,7 +75,7 @@ struct CompileUnitIdentifiers { LLVM_ABI Error write(MCStreamer &Out, ArrayRef Inputs, OnCuIndexOverflow OverflowOptValue, - bool ForceDwarf64StringOffsets); + Dwarf64StrOffsets StrOffsetsOptValue); typedef std::vector> SectionLengths; diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp index a92cf2339506f..758401f33c576 100644 --- a/llvm/lib/DWP/DWP.cpp +++ b/llvm/lib/DWP/DWP.cpp @@ -430,7 +430,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, StringRef CurStrSection, StringRef CurStrOffsetSection, uint16_t Version, SectionLengths &SectionLength, - const bool ForceDwarf64StringOffsets) { + const Dwarf64StrOffsets StrOffsetsOptValue) { // Could possibly produce an error or warning if one of these was non-null but // the other was null. if (CurStrSection.empty() || CurStrOffsetSection.empty()) @@ -444,15 +444,19 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can // emit a DWARF64 .debug_str_offsets table for this compile unit. If the - // \a ForceDwarf64StringOffsets argument is true, then force the emission of - // DWARF64 .debug_str_offsets for testing. + // \a StrOffsetsOptValue argument is Dwarf64StrOffsets::Always, then force + // the emission of DWARF64 .debug_str_offsets for testing. uint32_t OldOffsetSize = 4; - uint32_t NewOffsetSize = ForceDwarf64StringOffsets ? 8 : 4; + uint32_t NewOffsetSize = + StrOffsetsOptValue == Dwarf64StrOffsets::Always ? 8 : 4; while (const char *S = Data.getCStr(&LocalOffset)) { uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset); OffsetRemapping[PrevOffset] = NewOffset; - if (NewOffset > UINT32_MAX) + // Only promote the .debug_str_offsets to DWARF64 if our setting allows it. + if (StrOffsetsOptValue != Dwarf64StrOffsets::Disabled && + NewOffset > UINT32_MAX) { NewOffsetSize = 8; + } PrevOffset = LocalOffset; } @@ -670,7 +674,7 @@ Error handleSection( Error write(MCStreamer &Out, ArrayRef Inputs, OnCuIndexOverflow OverflowOptValue, - bool ForceDwarf64StringOffsets) { + Dwarf64StrOffsets StrOffsetsOptValue) { const auto &MCOFI = *Out.getContext().getObjectFileInfo(); MCSection *const StrSection = MCOFI.getDwarfStrDWOSection(); MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection(); @@ -764,7 +768,7 @@ Error write(MCStreamer &Out, ArrayRef Inputs, writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, CurStrOffsetSection, Header.Version, SectionLength, - ForceDwarf64StringOffsets); + StrOffsetsOptValue); for (auto Pair : SectionLength) { auto Index = getContributionIndex(Pair.first, IndexVersion); diff --git a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test index f73461b349688..0b75fb15d29fe 100644 --- a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test +++ b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test @@ -6,10 +6,17 @@ # 4GB .dwo file. # RUN: yaml2obj %s -o %t.dwo -# RUN: llvm-dwp %t.dwo -o %t.32.dwp -# RUN: llvm-dwp %t.dwo -o %t.64.dwp --force-dwarf64-str-offsets -# RUN: llvm-dwarfdump --debug-str-offsets %t.32.dwp | FileCheck --check-prefixes=DWARF32 %s -# RUN: llvm-dwarfdump --debug-str-offsets %t.64.dwp | FileCheck --check-prefixes=DWARF64 %s +# RUN: llvm-dwp %t.dwo -o %t.dwp +# RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets +# RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets=disabled +# RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets=enabled +# RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets=always +# RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s # DWARF32: .debug_str_offsets.dwo contents: # DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5 @@ -33,6 +40,8 @@ # DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp" # DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo" +# ERROR: invalid value for --dwarf64-str-offsets. Valid values are one of: "enabled", "disabled" or "always". + --- !ELF FileHeader: Class: ELFCLASS64 diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td index fddeb86fdae3c..2f07227e8adba 100644 --- a/llvm/tools/llvm-dwp/Opts.td +++ b/llvm/tools/llvm-dwp/Opts.td @@ -16,7 +16,13 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove "\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n" "\t\tthe 32 bit/4GB limits of the format.">, Values<"continue,soft-stop">; -def forceDwarf64StringOffsets : Flag<["-", "--"], "force-dwarf64-str-offsets">, - Flags<[HelpHidden]>, - HelpText<"Force all .debug_str_offsets to be emitted as DWARF64 tables. This " - "option is used for testing.">; + +def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets">; +def dwarf64StringOffsets_EQ : Joined<["-", "--"], "dwarf64-str-offsets=">, + HelpText<"default = disabled, This setting doesn't convert DWARF32 .debug_str_offsets\n" + "tables in .dwo files to DWARF64 in the .dwp file. = enabled, This allows .debug_str\n" + "tables to exceed the 4GB limit and have any DWARF32 .debug_str_offsets tables\n" + "converted to DWARF64 only for tables that require 64 bit string offsets.\n" + "= always, This forces all .debug_str_offsets tables to be emitted as DWARF64.\n" + "This is used for testing.">, + Values<"disabled,enabled,always">; diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index f735ecac50608..546710f30dfad 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -73,7 +73,6 @@ class DwpOptTable : public opt::GenericOptTable { static std::vector ExecFilenames; static std::string OutputFilename; static std::string ContinueOption; -static bool ForceDwarf64StringOffsets = false; static Expected> getDWOFilenames(StringRef ExecFilename) { @@ -126,6 +125,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { llvm::BumpPtrAllocator A; llvm::StringSaver Saver{A}; OnCuIndexOverflow OverflowOptValue = OnCuIndexOverflow::HardStop; + Dwarf64StrOffsets Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Disabled; + opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { llvm::errs() << Msg << '\n'; @@ -161,8 +162,26 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { } } } - if (Args.getLastArg(OPT_forceDwarf64StringOffsets)) - ForceDwarf64StringOffsets = true; + + if (Arg *Arg = Args.getLastArg(OPT_dwarf64StringOffsets, + OPT_dwarf64StringOffsets_EQ)) { + if (Arg->getOption().matches(OPT_dwarf64StringOffsets)) { + Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Enabled; + } else { + std::string OptValue = Arg->getValue(); + if (OptValue == "disabled") { + Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Disabled; + } else if (OptValue == "enabled") { + Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Enabled; + } else if (OptValue == "always") { + Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Always; + } else { + llvm::errs() << "invalid value for --dwarf64-str-offsets. Valid values " + "are one of: \"enabled\", \"disabled\" or \"always\".\n"; + exit(1); + } + } + } for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames)) ExecFilenames.emplace_back(A->getValue()); @@ -278,7 +297,7 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { return error("no object streamer for target " + TripleName, Context); if (auto Err = write(*MS, DWOFilenames, OverflowOptValue, - ForceDwarf64StringOffsets)) { + Dwarf64StrOffsetsValue)) { logAllUnhandledErrors(std::move(Err), WithColor::error()); return 1; } From 18707c5a5e1084bdea6599598aee1482def18a54 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Nov 2025 14:56:13 -0800 Subject: [PATCH 5/9] Fix a typo. --- llvm/include/llvm/DWP/DWP.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h index 16c4f9d701072..f04efd6bbe672 100644 --- a/llvm/include/llvm/DWP/DWP.h +++ b/llvm/include/llvm/DWP/DWP.h @@ -24,7 +24,7 @@ enum OnCuIndexOverflow { enum Dwarf64StrOffsets { Disabled, ///< Don't do any conversion of .debug_str_offsets tables. - Enabled, ///< Convert any .debug_str_offsets tables to DWARD64 if needed. + Enabled, ///< Convert any .debug_str_offsets tables to DWARF64 if needed. Always, ///< Always emit .debug_str_offsets talbes as DWARF64 for testing. }; From c3b53afe675659cac39dc7637b3f16732b6e43b7 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Nov 2025 14:56:53 -0800 Subject: [PATCH 6/9] Clang format. --- llvm/tools/llvm-dwp/llvm-dwp.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index 546710f30dfad..b5038a1b34204 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -176,8 +176,9 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { } else if (OptValue == "always") { Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Always; } else { - llvm::errs() << "invalid value for --dwarf64-str-offsets. Valid values " - "are one of: \"enabled\", \"disabled\" or \"always\".\n"; + llvm::errs() + << "invalid value for --dwarf64-str-offsets. Valid values " + "are one of: \"enabled\", \"disabled\" or \"always\".\n"; exit(1); } } @@ -296,8 +297,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { if (!MS) return error("no object streamer for target " + TripleName, Context); - if (auto Err = write(*MS, DWOFilenames, OverflowOptValue, - Dwarf64StrOffsetsValue)) { + if (auto Err = + write(*MS, DWOFilenames, OverflowOptValue, Dwarf64StrOffsetsValue)) { logAllUnhandledErrors(std::move(Err), WithColor::error()); return 1; } From 6e65e72e614784c1553edd14056dc525ab0fe3aa Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Nov 2025 15:01:07 -0800 Subject: [PATCH 7/9] Fix help text to represent the right default value if --dwarf64-str-offsets is specified without a value. --- llvm/tools/llvm-dwp/Opts.td | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td index 2f07227e8adba..33a4c19cb69d5 100644 --- a/llvm/tools/llvm-dwp/Opts.td +++ b/llvm/tools/llvm-dwp/Opts.td @@ -19,10 +19,9 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets">; def dwarf64StringOffsets_EQ : Joined<["-", "--"], "dwarf64-str-offsets=">, - HelpText<"default = disabled, This setting doesn't convert DWARF32 .debug_str_offsets\n" - "tables in .dwo files to DWARF64 in the .dwp file. = enabled, This allows .debug_str\n" - "tables to exceed the 4GB limit and have any DWARF32 .debug_str_offsets tables\n" - "converted to DWARF64 only for tables that require 64 bit string offsets.\n" - "= always, This forces all .debug_str_offsets tables to be emitted as DWARF64.\n" - "This is used for testing.">, + HelpText<"default = enabled, This allows .debug_str tables to exceed the 4GB limit\n" + "and have any DWARF32 .debug_str_offsets tables converted to DWARF64 only for tables\n" + "that require 64 bit string offsets. = disabled, This setting doesn't convert DWARF32\n" + " .debug_str_offsets tables in .dwo files to DWARF64 in the .dwp file. = always, This\n" + "forces all .debug_str_offsets tables to be emitted as DWARF64. This is used for testing.">, Values<"disabled,enabled,always">; From d4c35f696337dbf33013c88378d6cc3c588ca95e Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Nov 2025 15:03:26 -0800 Subject: [PATCH 8/9] More cleanup on help text for --dwarf64-str-offsets option. --- llvm/tools/llvm-dwp/Opts.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td index 33a4c19cb69d5..c2e653cbe344d 100644 --- a/llvm/tools/llvm-dwp/Opts.td +++ b/llvm/tools/llvm-dwp/Opts.td @@ -22,6 +22,6 @@ def dwarf64StringOffsets_EQ : Joined<["-", "--"], "dwarf64-str-offsets=">, HelpText<"default = enabled, This allows .debug_str tables to exceed the 4GB limit\n" "and have any DWARF32 .debug_str_offsets tables converted to DWARF64 only for tables\n" "that require 64 bit string offsets. = disabled, This setting doesn't convert DWARF32\n" - " .debug_str_offsets tables in .dwo files to DWARF64 in the .dwp file. = always, This\n" + ".debug_str_offsets tables in .dwo files to DWARF64 in the .dwp file. = always, This\n" "forces all .debug_str_offsets tables to be emitted as DWARF64. This is used for testing.">, Values<"disabled,enabled,always">; From 96d894cbac608ae2daf0a4280b0ab4564d38f123 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Nov 2025 17:00:34 -0800 Subject: [PATCH 9/9] Fix llvm-dwp assertion errors. I was creating a llvm-dwp file that had a .debug_str section that exceeded 4GB and some asserts were firing. --- llvm/lib/DWP/DWP.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp index 758401f33c576..d32884858dd30 100644 --- a/llvm/lib/DWP/DWP.cpp +++ b/llvm/lib/DWP/DWP.cpp @@ -416,12 +416,15 @@ static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data, DenseMap &OffsetRemapping, uint64_t &Offset, const uint64_t Size, uint32_t OldOffsetSize, uint32_t NewOffsetSize) { - + // Create a mask so we don't trigger a emitIntValue() assert below if the + // NewOffset is over 4GB. + const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX; while (Offset < Size) { const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize); const uint64_t NewOffset = OffsetRemapping[OldOffset]; - assert(NewOffsetSize == 8 || NewOffset <= UINT32_MAX); - Out.emitIntValue(NewOffset, NewOffsetSize); + // Truncate the string offset like the old llvm-dwp would have if we aren't + // promoting the .debug_str_offsets to DWARF64. + Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize); } }