Skip to content

Conversation

@clayborg
Copy link
Collaborator

This patch is updating the reading capabilities of the LLDB DWARF parser for a llvm-dwp patch #167457 that will emit .dwp files where the compile units are DWARF32 and the .debug_str_offsets tables will be emitted as DWARF64 to allow .debug_str sections that exceed 4GB in size.

@llvmbot
Copy link
Member

llvmbot commented Nov 14, 2025

@llvm/pr-subscribers-lldb

Author: Greg Clayton (clayborg)

Changes

This patch is updating the reading capabilities of the LLDB DWARF parser for a llvm-dwp patch #167457 that will emit .dwp files where the compile units are DWARF32 and the .debug_str_offsets tables will be emitted as DWARF64 to allow .debug_str sections that exceed 4GB in size.


Full diff: https://github.com/llvm/llvm-project/pull/167997.diff

5 Files Affected:

  • (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp (+10-4)
  • (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h (+1)
  • (added) lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-dwp.yaml (+44)
  • (added) lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-exe.yaml (+100)
  • (added) lldb/test/Shell/SymbolFile/DWARF/dwp-str-offsets-dwarf64.test (+25)
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 94fc2e83e899d..14bfa75d772a4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -360,8 +360,10 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
     const DWARFDataExtractor &strOffsets =
         GetSymbolFileDWARF().GetDWARFContext().getOrLoadStrOffsetsData();
     uint64_t length = strOffsets.GetU32(&baseOffset);
-    if (length == 0xffffffff)
+    if (length == llvm::dwarf::DW_LENGTH_DWARF64) {
       length = strOffsets.GetU64(&baseOffset);
+      m_str_offsets_size = 8;
+    }
 
     // Check version.
     if (strOffsets.GetU16(&baseOffset) < 5)
@@ -369,8 +371,13 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
 
     // Skip padding.
     baseOffset += 2;
+  } else {
+    // Size of offset for .debug_str_offsets is same as DWARF offset byte size
+    // of the DWARFUnit for DWARF version 4 and earlier.
+    m_str_offsets_size = m_header.getDwarfOffsetByteSize();
   }
 
+
   SetStrOffsetsBase(baseOffset);
 }
 
@@ -1079,10 +1086,9 @@ uint32_t DWARFUnit::GetHeaderByteSize() const { return m_header.getSize(); }
 
 std::optional<uint64_t>
 DWARFUnit::GetStringOffsetSectionItem(uint32_t index) const {
-  lldb::offset_t offset =
-      GetStrOffsetsBase() + index * m_header.getDwarfOffsetByteSize();
+  lldb::offset_t offset = GetStrOffsetsBase() + index * m_str_offsets_size;
   return m_dwarf.GetDWARFContext().getOrLoadStrOffsetsData().GetMaxU64(
-      &offset, m_header.getDwarfOffsetByteSize());
+      &offset, m_str_offsets_size);
 }
 
 llvm::Expected<llvm::DWARFAddressRangesVector>
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 91a693860c55a..fa863eb8417ab 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -364,6 +364,7 @@ class DWARFUnit : public DWARFExpression::Delegate, public UserID {
   dw_offset_t m_line_table_offset = DW_INVALID_OFFSET;
 
   dw_offset_t m_str_offsets_base = 0; // Value of DW_AT_str_offsets_base.
+  dw_offset_t m_str_offsets_size = 4; // Size in bytes of the string offsets.
 
   std::optional<llvm::DWARFDebugRnglistTable> m_rnglist_table;
   bool m_rnglist_table_done = false;
diff --git a/lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-dwp.yaml b/lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-dwp.yaml
new file mode 100644
index 0000000000000..d4e2ceea9c4f6
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-dwp.yaml
@@ -0,0 +1,44 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+  SectionHeaderStringTable: .strtab
+Sections:
+  - Name:            .debug_abbrev.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000
+  - Name:            .debug_str.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E3129006D61696E2E6D696E696D616C2E637070006D61696E2E6D696E696D616C2E64776F00
+  - Name:            .debug_str_offsets.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         'FFFFFFFF4400000000000000050000000000000000000000050000000000000009000000000000000E000000000000001300000000000000180000000000000046000000000000005700000000000000'
+  - Name:            .debug_info.dwo
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_EXCLUDE ]
+    AddressAlign:    0x1
+    Content:         54000000050005080000000099E97383BBC6980B0105210006070200160000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100
+  - Name:            .debug_cu_index
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         05000000030000000100000002000000000000000000000099E97383BBC6980B0000000001000000010000000300000006000000000000000000000000000000580000004B00000028000000
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .debug_abbrev.dwo
+      - Name:            .debug_str.dwo
+      - Name:            .debug_str_offsets.dwo
+      - Name:            .debug_info.dwo
+      - Name:            .debug_cu_index
+      - Name:            .symtab
+Symbols:         []
+...
diff --git a/lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-exe.yaml b/lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-exe.yaml
new file mode 100644
index 0000000000000..3785bbe6ecbe0
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/Inputs/dwp-str-offsets-dwarf64-exe.yaml
@@ -0,0 +1,100 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+ProgramHeaders:
+  - Type:            PT_PHDR
+    Flags:           [ PF_R ]
+    VAddr:           0x200040
+    Align:           0x8
+    Offset:          0x40
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame
+    LastSec:         .eh_frame
+    VAddr:           0x200000
+    Align:           0x1000
+    Offset:          0x0
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .text
+    LastSec:         .text
+    VAddr:           0x201160
+    Align:           0x1000
+    Offset:          0x160
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+    Align:           0x0
+    Offset:          0x0
+Sections:
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x200120
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C0708900100001C0000001C000000201000001600000000410E108602430D06510C070800000000000000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x201160
+    AddressAlign:    0x10
+    Content:         554889E5C745FC00000000897DF8488975F031C05DC3
+  - Name:            .debug_abbrev
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         014A00101772171B25B442197625111B12067317000000
+  - Name:            .debug_info
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         24000000050004080000000099E97383BBC6980B0100000000080000000001001600000008000000
+  - Name:            .debug_str_offsets
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         0C000000050000000000000002000000
+  - Name:            .debug_gnu_pubnames
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         18000000020000000000280000001A000000306D61696E0000000000
+  - Name:            .debug_gnu_pubtypes
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         '21000000020000000000280000004000000090696E74005300000090636861720000000000'
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E3129004C696E6B65723A204C4C442032322E302E30202868747470733A2F2F6769746875622E636F6D2F636C6179626F72672F6C6C766D2D70726F6A6563742E67697420613234333130363863303837656463303938393330303934343864343162356138336361303363392900
+  - Name:            .debug_line
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         5A0000000500080037000000010101FB0E0D00010101010000000100000101011F010000000003011F020F051E0102000000004E0649A10A56ED4D381C49A3DB4F6825040000090260112000000000000105030A0821060B2E0202000101
+  - Name:            .debug_line_str
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         2E006D61696E2E6D696E696D616C2E63707000
+Symbols:
+  - Name:            main.minimal.cpp
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            main
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x201160
+    Size:            0x16
+DWARF:
+  debug_str:
+    - .
+    - main.minimal.dwo
+  debug_addr:
+    - Length:          0xC
+      Version:         0x5
+      AddressSize:     0x8
+      Entries:
+        - Address:         0x201160
+...
diff --git a/lldb/test/Shell/SymbolFile/DWARF/dwp-str-offsets-dwarf64.test b/lldb/test/Shell/SymbolFile/DWARF/dwp-str-offsets-dwarf64.test
new file mode 100644
index 0000000000000..b010aa0995971
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/dwp-str-offsets-dwarf64.test
@@ -0,0 +1,25 @@
+# This test verifies that LLDB can read a .dwp file that has a DWARF32 compile
+# unit that has a DWARF64 .debug_str_offsets table. This will be needed for
+# llvm-dwp changes that will start emitting 64 bit line tables for compile units
+# that have strings in the .debug_str section whose string offsets exceed 4GB.
+# By having a 64 bit .debug_str_offsets table, we can emit .debug_str sections
+# with no size limits.
+
+# RUN: yaml2obj %S/Inputs/dwp-str-offsets-dwarf64-exe.yaml > %t
+# RUN: yaml2obj %S/Inputs/dwp-str-offsets-dwarf64-dwp.yaml > %t.dwp
+# RUN: %lldb %t -b \
+# RUN:      -o 'image lookup --verbose --address 0x0000000000201172' | \
+# RUN:      FileCheck %s
+
+# CHECK:      (lldb) image lookup --verbose --address 0x0000000000201172
+# CHECK-NEXT:      Address: dwp-str-offsets-dwarf64.test.tmp[0x0000000000201172] (dwp-str-offsets-dwarf64.test.tmp.PT_LOAD[1]..text + 18)
+# CHECK-NEXT:      Summary: dwp-str-offsets-dwarf64.test.tmp`main + 18 at main.minimal.cpp:2:3
+# CHECK-NEXT:       Module: file = "{{.*}}/dwp-str-offsets-dwarf64.test.tmp", arch = "x86_64"
+# CHECK-NEXT:  CompileUnit: id = {0x00000000}, file = "main.minimal.cpp", language = "<not loaded>"
+# CHECK-NEXT:     Function: id = {0x7fffff000000001a}, name = "main", range = [0x0000000000201160-0x0000000000201176)
+# CHECK-NEXT:     FuncType: id = {0x7fffff000000001a}, byte-size = 0, decl = main.minimal.cpp:1, compiler_type = "int (int, const char **)"
+# CHECK-NEXT:       Blocks: id = {0x7fffff000000001a}, range = [0x00201160-0x00201176)
+# CHECK-NEXT:    LineEntry: [0x0000000000201172-0x0000000000201174): main.minimal.cpp:2:3
+# CHECK-NEXT:       Symbol: id = {0x00000002}, range = [0x0000000000201160-0x0000000000201176), name="main"
+# CHECK-NEXT:     Variable: id = {0x7fffff0000000029}, name = "argc", type = "int", valid ranges = <block>, location = DW_OP_fbreg -8, decl = main.minimal.cpp:1
+# CHECK-NEXT:     Variable: id = {0x7fffff0000000034}, name = "argv", type = "const char **", valid ranges = <block>, location = DW_OP_fbreg -16, decl = main.minimal.cpp:1

@github-actions
Copy link

github-actions bot commented Nov 14, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Member

@Michael137 Michael137 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems reasonable

Though looks like there's a genuine unittest failure.

@clayborg
Copy link
Collaborator Author

Seems reasonable

Though looks like there's a genuine unittest failure.

I fixed the unit test failure and refactored a bit.

Comment on lines 366 to 370
uint64_t length = strOffsets.GetU32(&baseOffset);
if (length == 0xffffffff)
if (length == llvm::dwarf::DW_LENGTH_DWARF64) {
length = strOffsets.GetU64(&baseOffset);
m_str_offset_size = 8;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a utility for reading the 32/64 encoded length we could reuse here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't. The LLVM one is hidden and not in the DWARFDataExtractor and there is no counterpart in LLDB's DWARFDataExtractor.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was able to use the LLVM DWARFDataExtractor and re-use the getInitialLength() function from there.

Comment on lines 378 to 380
}

SetStrOffsetsBase(baseOffset);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove unrelated reformatting

…DWARF32 DWARF units in .dwp files in LLDB.

This patch is updating the reading capabilities of the LLDB DWARF parser for a llvm-dwp patch llvm#167457 that will emit .dwp files where the compile units are DWARF32 and the .debug_str_offsets tables will be emitted as DWARF64 to allow .debug_str sections that exceed 4GB in size.
@clayborg clayborg force-pushed the dwp-lldb-read-str-offsets-dwarf64 branch from cd18c9c to a85ad14 Compare November 14, 2025 23:24
Comment on lines +369 to +371
uint64_t length;
llvm::dwarf::DwarfFormat format;
std::tie(length, format) = strOffsets.getInitialLength(&baseOffset);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be simpler to write that as auto [length, format] = strOffsets... (though it is a readability tradeoff due to omitting the types of the two variables)

@clayborg clayborg merged commit 4530047 into llvm:main Nov 15, 2025
8 of 9 checks passed
@clayborg clayborg deleted the dwp-lldb-read-str-offsets-dwarf64 branch November 15, 2025 00:35
@slydiman
Copy link
Contributor

The buildbots lldb-x86_64-win and lldb-remote-linux-win are broken after this patch.
Please take a look.

FAIL: lldb-shell::dwp-str-offsets-dwarf64.test
# | Input file: <stdin>
# | Check file: C:\buildbot\as-builder-10\lldb-x-aarch64\llvm-project\lldb\test\Shell\SymbolFile\DWARF\dwp-str-offsets-dwarf64.test
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |            .
# |            .
# |            .
# |           16: (lldb) command source -C --silent-run true lit-lldb-init 
# |           17: (lldb) target create "C:\\buildbot\\as-builder-10\\lldb-x-aarch64\\build\\tools\\lldb\\test\\Shell\\SymbolFile\\DWARF\\Output\\dwp-str-offsets-dwarf64.test.tmp" 
# |           18: Current executable set to 'C:\buildbot\as-builder-10\lldb-x-aarch64\build\tools\lldb\test\Shell\SymbolFile\DWARF\Output\dwp-str-offsets-dwarf64.test.tmp' (x86_64). 
# |           19: (lldb) image lookup --verbose --address 0x0000000000201172 
# |           20:  Address: dwp-str-offsets-dwarf64.test.tmp[0x0000000000201172] (dwp-str-offsets-dwarf64.test.tmp.PT_LOAD[1]..text + 18) 
# |           21:  Summary: dwp-str-offsets-dwarf64.test.tmp`main + 18 at main.minimal.cpp:2:3 
# | next:17'0                                                                                 X error: no match found
# |           22:  Module: file = "C:\buildbot\as-builder-10\lldb-x-aarch64\build\tools\lldb\test\Shell\SymbolFile\DWARF\Output\dwp-str-offsets-dwarf64.test.tmp", arch = "x86_64" 
# | next:17'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:17'1                                                                                              ?                                                                        possible intended match
# |           23:  CompileUnit: id = {0x00000000}, file = "main.minimal.cpp", language = "<not loaded>" 
# | next:17'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           24:  Function: id = {0x7fffff000000001a}, name = "main", range = [0x0000000000201160-0x0000000000201176) 
# | next:17'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           25:  FuncType: id = {0x7fffff000000001a}, byte-size = 0, decl = main.minimal.cpp:1, compiler_type = "int (int, const char **)" 
# | next:17'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           26:  Blocks: id = {0x7fffff000000001a}, range = [0x00201160-0x00201176) 
# | next:17'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           27:  LineEntry: [0x0000000000201172-0x0000000000201174): main.minimal.cpp:2:3 
# | next:17'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants