Skip to content

Commit ac6e48d

Browse files
authored
Modify llvm-dwp to be able to emit string tables over 4GB without losing data (#167457)
We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset. Fixed LLDB to be able to successfully load the 64 bit string tables in .dwp files. Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing with DWARF64 string table headers.
1 parent 2ad93b4 commit ac6e48d

File tree

6 files changed

+214
-27
lines changed

6 files changed

+214
-27
lines changed

llvm/include/llvm/DWP/DWP.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ enum OnCuIndexOverflow {
2222
Continue,
2323
};
2424

25+
enum Dwarf64StrOffsetsPromotion {
26+
Disabled, ///< Don't do any conversion of .debug_str_offsets tables.
27+
Enabled, ///< Convert any .debug_str_offsets tables to DWARF64 if needed.
28+
Always, ///< Always emit .debug_str_offsets talbes as DWARF64 for testing.
29+
};
30+
2531
struct UnitIndexEntry {
2632
DWARFUnitIndex::Entry::SectionContribution Contributions[8];
2733
std::string Name;
@@ -68,7 +74,10 @@ struct CompileUnitIdentifiers {
6874
};
6975

7076
LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
71-
OnCuIndexOverflow OverflowOptValue);
77+
OnCuIndexOverflow OverflowOptValue,
78+
Dwarf64StrOffsetsPromotion StrOffsetsOptValue);
79+
80+
typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
7281

7382
LLVM_ABI Error handleSection(
7483
const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
@@ -82,7 +91,7 @@ LLVM_ABI Error handleSection(
8291
std::vector<StringRef> &CurTypesSection,
8392
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
8493
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
85-
std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
94+
SectionLengths &SectionLength);
8695

8796
LLVM_ABI Expected<InfoSectionUnitHeader>
8897
parseInfoSectionUnitHeader(StringRef Info);

llvm/include/llvm/DWP/DWPStringPool.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ class DWPStringPool {
3232

3333
MCStreamer &Out;
3434
MCSection *Sec;
35-
DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
36-
uint32_t Offset = 0;
35+
DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
36+
uint64_t Offset = 0;
3737

3838
public:
3939
DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
4040

41-
uint32_t getOffset(const char *Str, unsigned Length) {
41+
uint64_t getOffset(const char *Str, unsigned Length) {
4242
assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
4343

4444
auto Pair = Pool.insert(std::make_pair(Str, Offset));

llvm/lib/DWP/DWP.cpp

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -413,33 +413,52 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
413413
}
414414

415415
static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
416-
DenseMap<uint64_t, uint32_t> &OffsetRemapping,
417-
uint64_t &Offset, uint64_t &Size) {
418-
416+
DenseMap<uint64_t, uint64_t> &OffsetRemapping,
417+
uint64_t &Offset, const uint64_t Size,
418+
uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
419+
// Create a mask so we don't trigger a emitIntValue() assert below if the
420+
// NewOffset is over 4GB.
421+
const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX;
419422
while (Offset < Size) {
420-
auto OldOffset = Data.getU32(&Offset);
421-
auto NewOffset = OffsetRemapping[OldOffset];
422-
Out.emitIntValue(NewOffset, 4);
423+
const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
424+
const uint64_t NewOffset = OffsetRemapping[OldOffset];
425+
// Truncate the string offset like the old llvm-dwp would have if we aren't
426+
// promoting the .debug_str_offsets to DWARF64.
427+
Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize);
423428
}
424429
}
425430

426-
void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
427-
MCSection *StrOffsetSection,
428-
StringRef CurStrSection,
429-
StringRef CurStrOffsetSection, uint16_t Version) {
431+
void writeStringsAndOffsets(
432+
MCStreamer &Out, DWPStringPool &Strings, MCSection *StrOffsetSection,
433+
StringRef CurStrSection, StringRef CurStrOffsetSection, uint16_t Version,
434+
SectionLengths &SectionLength,
435+
const Dwarf64StrOffsetsPromotion StrOffsetsOptValue) {
430436
// Could possibly produce an error or warning if one of these was non-null but
431437
// the other was null.
432438
if (CurStrSection.empty() || CurStrOffsetSection.empty())
433439
return;
434440

435-
DenseMap<uint64_t, uint32_t> OffsetRemapping;
441+
DenseMap<uint64_t, uint64_t> OffsetRemapping;
436442

437443
DataExtractor Data(CurStrSection, true, 0);
438444
uint64_t LocalOffset = 0;
439445
uint64_t PrevOffset = 0;
446+
447+
// Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
448+
// emit a DWARF64 .debug_str_offsets table for this compile unit. If the
449+
// \a StrOffsetsOptValue argument is Dwarf64StrOffsetsPromotion::Always, then
450+
// force the emission of DWARF64 .debug_str_offsets for testing.
451+
uint32_t OldOffsetSize = 4;
452+
uint32_t NewOffsetSize =
453+
StrOffsetsOptValue == Dwarf64StrOffsetsPromotion::Always ? 8 : 4;
440454
while (const char *S = Data.getCStr(&LocalOffset)) {
441-
OffsetRemapping[PrevOffset] =
442-
Strings.getOffset(S, LocalOffset - PrevOffset);
455+
uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
456+
OffsetRemapping[PrevOffset] = NewOffset;
457+
// Only promote the .debug_str_offsets to DWARF64 if our setting allows it.
458+
if (StrOffsetsOptValue != Dwarf64StrOffsetsPromotion::Disabled &&
459+
NewOffset > UINT32_MAX) {
460+
NewOffsetSize = 8;
461+
}
443462
PrevOffset = LocalOffset;
444463
}
445464

@@ -451,7 +470,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
451470
uint64_t Size = CurStrOffsetSection.size();
452471
if (Version > 4) {
453472
while (Offset < Size) {
454-
uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
473+
const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
455474
assert(HeaderSize <= Size - Offset &&
456475
"StrOffsetSection size is less than its header");
457476

@@ -461,16 +480,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
461480
if (HeaderSize == 8) {
462481
ContributionSize = Data.getU32(&HeaderLengthOffset);
463482
} else if (HeaderSize == 16) {
483+
OldOffsetSize = 8;
464484
HeaderLengthOffset += 4; // skip the dwarf64 marker
465485
ContributionSize = Data.getU64(&HeaderLengthOffset);
466486
}
467487
ContributionEnd = ContributionSize + HeaderLengthOffset;
468-
Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
469-
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);
488+
489+
StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
490+
if (OldOffsetSize == 4 && NewOffsetSize == 8) {
491+
// We had a DWARF32 .debug_str_offsets header, but we need to emit
492+
// some string offsets that require 64 bit offsets on the .debug_str
493+
// section. Emit the .debug_str_offsets header in DWARF64 format so we
494+
// can emit string offsets that exceed UINT32_MAX without truncating
495+
// the string offset.
496+
497+
// 2 bytes for DWARF version, 2 bytes pad.
498+
const uint64_t VersionPadSize = 4;
499+
const uint64_t NewLength =
500+
(ContributionSize - VersionPadSize) * 2 + VersionPadSize;
501+
// Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
502+
// value followed by the 8 byte updated length.
503+
Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
504+
Out.emitIntValue(NewLength, 8);
505+
// Emit DWARF version as a 2 byte integer.
506+
Out.emitIntValue(Version, 2);
507+
// Emit 2 bytes of padding.
508+
Out.emitIntValue(0, 2);
509+
// Update the .debug_str_offsets section length contribution for the
510+
// this .dwo file.
511+
for (auto &Pair : SectionLength) {
512+
if (Pair.first == DW_SECT_STR_OFFSETS) {
513+
Pair.second = NewLength + 12;
514+
break;
515+
}
516+
}
517+
} else {
518+
// Just emit the same .debug_str_offsets header.
519+
Out.emitBytes(HeaderBytes);
520+
}
521+
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
522+
OldOffsetSize, NewOffsetSize);
470523
}
471524

472525
} else {
473-
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
526+
assert(OldOffsetSize == NewOffsetSize);
527+
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
528+
NewOffsetSize);
474529
}
475530
}
476531

@@ -562,7 +617,7 @@ Error handleSection(
562617
std::vector<StringRef> &CurTypesSection,
563618
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
564619
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
565-
std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
620+
SectionLengths &SectionLength) {
566621
if (Section.isBSS())
567622
return Error::success();
568623

@@ -620,7 +675,8 @@ Error handleSection(
620675
}
621676

622677
Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
623-
OnCuIndexOverflow OverflowOptValue) {
678+
OnCuIndexOverflow OverflowOptValue,
679+
Dwarf64StrOffsetsPromotion StrOffsetsOptValue) {
624680
const auto &MCOFI = *Out.getContext().getObjectFileInfo();
625681
MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
626682
MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
@@ -684,7 +740,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
684740
// This maps each section contained in this file to its length.
685741
// This information is later on used to calculate the contributions,
686742
// i.e. offset and length, of each compile/type unit to a section.
687-
std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
743+
SectionLengths SectionLength;
688744

689745
for (const auto &Section : Obj.sections())
690746
if (auto Err = handleSection(
@@ -713,7 +769,8 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
713769
}
714770

715771
writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
716-
CurStrOffsetSection, Header.Version);
772+
CurStrOffsetSection, Header.Version, SectionLength,
773+
StrOffsetsOptValue);
717774

718775
for (auto Pair : SectionLength) {
719776
auto Index = getContributionIndex(Pair.first, IndexVersion);
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# This test tests that llvm-dwp can successfully promote .debug_str_offsets to
2+
# DWARF64. We do this by using a hidden option to llvm-dwp which is
3+
# "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can
4+
# successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64
5+
# version. This allows us to test the functionality without having to create a
6+
# 4GB .dwo file.
7+
8+
# RUN: yaml2obj %s -o %t.dwo
9+
# RUN: llvm-dwp %t.dwo -o %t.dwp
10+
# RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets-promotion
11+
# RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets-promotion=disabled
12+
# RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets-promotion=enabled
13+
# RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets-promotion=always
14+
# RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets-promotion=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s
15+
# RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s
16+
# RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s
17+
# RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s
18+
# RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s
19+
# RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s
20+
21+
# DWARF32: .debug_str_offsets.dwo contents:
22+
# DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5
23+
# DWARF32-NEXT: 0x00000008: 00000000 "main"
24+
# DWARF32-NEXT: 0x0000000c: 00000005 "int"
25+
# DWARF32-NEXT: 0x00000010: 00000009 "argc"
26+
# DWARF32-NEXT: 0x00000014: 0000000e "argv"
27+
# DWARF32-NEXT: 0x00000018: 00000013 "char"
28+
# DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
29+
# DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp"
30+
# DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo"
31+
32+
# DWARF64: .debug_str_offsets.dwo contents:
33+
# DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5
34+
# DWARF64-NEXT: 0x00000010: 0000000000000000 "main"
35+
# DWARF64-NEXT: 0x00000018: 0000000000000005 "int"
36+
# DWARF64-NEXT: 0x00000020: 0000000000000009 "argc"
37+
# DWARF64-NEXT: 0x00000028: 000000000000000e "argv"
38+
# DWARF64-NEXT: 0x00000030: 0000000000000013 "char"
39+
# DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
40+
# DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp"
41+
# DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo"
42+
43+
# ERROR: invalid value for --dwarf64-str-offsets-promotion. Valid values are one of: "enabled", "disabled" or "always".
44+
45+
--- !ELF
46+
FileHeader:
47+
Class: ELFCLASS64
48+
Data: ELFDATA2LSB
49+
Type: ET_REL
50+
Machine: EM_X86_64
51+
SectionHeaderStringTable: .strtab
52+
Sections:
53+
- Name: .debug_str_offsets.dwo
54+
Type: SHT_PROGBITS
55+
Flags: [ SHF_EXCLUDE ]
56+
AddressAlign: 0x1
57+
Content: '24000000050000000000000005000000090000000E00000013000000180000004600000051000000'
58+
- Name: .debug_str.dwo
59+
Type: SHT_PROGBITS
60+
Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
61+
AddressAlign: 0x1
62+
EntSize: 0x1
63+
Content: 6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00
64+
- Name: .debug_info.dwo
65+
Type: SHT_PROGBITS
66+
Flags: [ SHF_EXCLUDE ]
67+
AddressAlign: 0x1
68+
Content: 540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100
69+
- Name: .debug_abbrev.dwo
70+
Type: SHT_PROGBITS
71+
Flags: [ SHF_EXCLUDE ]
72+
AddressAlign: 0x1
73+
Content: 01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000
74+
- Type: SectionHeaderTable
75+
Sections:
76+
- Name: .strtab
77+
- Name: .debug_str_offsets.dwo
78+
- Name: .debug_str.dwo
79+
- Name: .debug_info.dwo
80+
- Name: .debug_abbrev.dwo
81+
...

llvm/tools/llvm-dwp/Opts.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,18 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove
1616
"\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n"
1717
"\t\tthe 32 bit/4GB limits of the format.">,
1818
Values<"continue,soft-stop">;
19+
20+
def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets-promotion">;
21+
def dwarf64StringOffsets_EQ
22+
: Joined<["-", "--"], "dwarf64-str-offsets-promotion=">,
23+
HelpText<"default = enabled, This allows .debug_str tables to exceed the "
24+
"4GB limit\n"
25+
"and have any DWARF32 .debug_str_offsets tables converted to "
26+
"DWARF64 only for tables\n"
27+
"that require 64 bit string offsets. = disabled, This setting "
28+
"doesn't convert DWARF32\n"
29+
".debug_str_offsets tables in .dwo files to DWARF64 in the .dwp "
30+
"file. = always, This\n"
31+
"forces all .debug_str_offsets tables to be emitted as DWARF64. "
32+
"This is used for testing.">,
33+
Values<"disabled,enabled,always">;

llvm/tools/llvm-dwp/llvm-dwp.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
125125
llvm::BumpPtrAllocator A;
126126
llvm::StringSaver Saver{A};
127127
OnCuIndexOverflow OverflowOptValue = OnCuIndexOverflow::HardStop;
128+
Dwarf64StrOffsetsPromotion Dwarf64StrOffsetsValue =
129+
Dwarf64StrOffsetsPromotion::Disabled;
130+
128131
opt::InputArgList Args =
129132
Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
130133
llvm::errs() << Msg << '\n';
@@ -161,6 +164,27 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
161164
}
162165
}
163166

167+
if (Arg *Arg = Args.getLastArg(OPT_dwarf64StringOffsets,
168+
OPT_dwarf64StringOffsets_EQ)) {
169+
if (Arg->getOption().matches(OPT_dwarf64StringOffsets)) {
170+
Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled;
171+
} else {
172+
std::string OptValue = Arg->getValue();
173+
if (OptValue == "disabled") {
174+
Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Disabled;
175+
} else if (OptValue == "enabled") {
176+
Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled;
177+
} else if (OptValue == "always") {
178+
Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Always;
179+
} else {
180+
llvm::errs()
181+
<< "invalid value for --dwarf64-str-offsets-promotion. Valid "
182+
"values are one of: \"enabled\", \"disabled\" or \"always\".\n";
183+
exit(1);
184+
}
185+
}
186+
}
187+
164188
for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames))
165189
ExecFilenames.emplace_back(A->getValue());
166190

@@ -274,7 +298,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
274298
if (!MS)
275299
return error("no object streamer for target " + TripleName, Context);
276300

277-
if (auto Err = write(*MS, DWOFilenames, OverflowOptValue)) {
301+
if (auto Err =
302+
write(*MS, DWOFilenames, OverflowOptValue, Dwarf64StrOffsetsValue)) {
278303
logAllUnhandledErrors(std::move(Err), WithColor::error());
279304
return 1;
280305
}

0 commit comments

Comments
 (0)