Skip to content
13 changes: 11 additions & 2 deletions llvm/include/llvm/DWP/DWP.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ enum OnCuIndexOverflow {
Continue,
};

enum Dwarf64StrOffsets {
Disabled, ///< Don't do any conversion of .debug_str_offsets tables.
Enabled, ///< Convert any .debug_str_offsets tables to DWARF64 if needed.
Always, ///< Always emit .debug_str_offsets talbes as DWARF64 for testing.
};

struct UnitIndexEntry {
DWARFUnitIndex::Entry::SectionContribution Contributions[8];
std::string Name;
Expand Down Expand Up @@ -68,7 +74,10 @@ struct CompileUnitIdentifiers {
};

LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
OnCuIndexOverflow OverflowOptValue);
OnCuIndexOverflow OverflowOptValue,
Dwarf64StrOffsets StrOffsetsOptValue);

typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;

LLVM_ABI Error handleSection(
const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
Expand All @@ -82,7 +91,7 @@ LLVM_ABI Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
SectionLengths &SectionLength);

LLVM_ABI Expected<InfoSectionUnitHeader>
parseInfoSectionUnitHeader(StringRef Info);
Expand Down
6 changes: 3 additions & 3 deletions llvm/include/llvm/DWP/DWPStringPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ class DWPStringPool {

MCStreamer &Out;
MCSection *Sec;
DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
uint32_t Offset = 0;
DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
uint64_t Offset = 0;

public:
DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}

uint32_t getOffset(const char *Str, unsigned Length) {
uint64_t getOffset(const char *Str, unsigned Length) {
assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");

auto Pair = Pool.insert(std::make_pair(Str, Offset));
Expand Down
94 changes: 76 additions & 18 deletions llvm/lib/DWP/DWP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,33 +413,53 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
}

static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
DenseMap<uint64_t, uint32_t> &OffsetRemapping,
uint64_t &Offset, uint64_t &Size) {

DenseMap<uint64_t, uint64_t> &OffsetRemapping,
uint64_t &Offset, const uint64_t Size,
uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
// Create a mask so we don't trigger a emitIntValue() assert below if the
// NewOffset is over 4GB.
const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX;
while (Offset < Size) {
auto OldOffset = Data.getU32(&Offset);
auto NewOffset = OffsetRemapping[OldOffset];
Out.emitIntValue(NewOffset, 4);
const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
const uint64_t NewOffset = OffsetRemapping[OldOffset];
// Truncate the string offset like the old llvm-dwp would have if we aren't
// promoting the .debug_str_offsets to DWARF64.
Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize);
}
}

void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
MCSection *StrOffsetSection,
StringRef CurStrSection,
StringRef CurStrOffsetSection, uint16_t Version) {
StringRef CurStrOffsetSection, uint16_t Version,
SectionLengths &SectionLength,
const Dwarf64StrOffsets StrOffsetsOptValue) {
// Could possibly produce an error or warning if one of these was non-null but
// the other was null.
if (CurStrSection.empty() || CurStrOffsetSection.empty())
return;

DenseMap<uint64_t, uint32_t> OffsetRemapping;
DenseMap<uint64_t, uint64_t> OffsetRemapping;

DataExtractor Data(CurStrSection, true, 0);
uint64_t LocalOffset = 0;
uint64_t PrevOffset = 0;

// Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
// emit a DWARF64 .debug_str_offsets table for this compile unit. If the
// \a StrOffsetsOptValue argument is Dwarf64StrOffsets::Always, then force
// the emission of DWARF64 .debug_str_offsets for testing.
uint32_t OldOffsetSize = 4;
uint32_t NewOffsetSize =
StrOffsetsOptValue == Dwarf64StrOffsets::Always ? 8 : 4;
while (const char *S = Data.getCStr(&LocalOffset)) {
OffsetRemapping[PrevOffset] =
Strings.getOffset(S, LocalOffset - PrevOffset);
uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
OffsetRemapping[PrevOffset] = NewOffset;
// Only promote the .debug_str_offsets to DWARF64 if our setting allows it.
if (StrOffsetsOptValue != Dwarf64StrOffsets::Disabled &&
NewOffset > UINT32_MAX) {
NewOffsetSize = 8;
}
PrevOffset = LocalOffset;
}

Expand All @@ -451,7 +471,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
uint64_t Size = CurStrOffsetSection.size();
if (Version > 4) {
while (Offset < Size) {
uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
assert(HeaderSize <= Size - Offset &&
"StrOffsetSection size is less than its header");

Expand All @@ -461,16 +481,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
if (HeaderSize == 8) {
ContributionSize = Data.getU32(&HeaderLengthOffset);
} else if (HeaderSize == 16) {
OldOffsetSize = 8;
HeaderLengthOffset += 4; // skip the dwarf64 marker
ContributionSize = Data.getU64(&HeaderLengthOffset);
}
ContributionEnd = ContributionSize + HeaderLengthOffset;
Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);

StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
if (OldOffsetSize == 4 && NewOffsetSize == 8) {
// We had a DWARF32 .debug_str_offsets header, but we need to emit
// some string offsets that require 64 bit offsets on the .debug_str
// section. Emit the .debug_str_offsets header in DWARF64 format so we
// can emit string offsets that exceed UINT32_MAX without truncating
// the string offset.

// 2 bytes for DWARF version, 2 bytes pad.
const uint64_t VersionPadSize = 4;
const uint64_t NewLength =
(ContributionSize - VersionPadSize) * 2 + VersionPadSize;
// Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
// value followed by the 8 byte updated length.
Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
Out.emitIntValue(NewLength, 8);
// Emit DWARF version as a 2 byte integer.
Out.emitIntValue(Version, 2);
// Emit 2 bytes of padding.
Out.emitIntValue(0, 2);
// Update the .debug_str_offsets section length contribution for the
// this .dwo file.
for (auto &Pair : SectionLength) {
if (Pair.first == DW_SECT_STR_OFFSETS) {
Pair.second = NewLength + 12;
break;
}
}
} else {
// Just emit the same .debug_str_offsets header.
Out.emitBytes(HeaderBytes);
}
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
OldOffsetSize, NewOffsetSize);
}

} else {
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
assert(OldOffsetSize == NewOffsetSize);
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
NewOffsetSize);
}
}

Expand Down Expand Up @@ -562,7 +618,7 @@ Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
SectionLengths &SectionLength) {
if (Section.isBSS())
return Error::success();

Expand Down Expand Up @@ -620,7 +676,8 @@ Error handleSection(
}

Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
OnCuIndexOverflow OverflowOptValue) {
OnCuIndexOverflow OverflowOptValue,
Dwarf64StrOffsets StrOffsetsOptValue) {
const auto &MCOFI = *Out.getContext().getObjectFileInfo();
MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
Expand Down Expand Up @@ -684,7 +741,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
// This maps each section contained in this file to its length.
// This information is later on used to calculate the contributions,
// i.e. offset and length, of each compile/type unit to a section.
std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
SectionLengths SectionLength;

for (const auto &Section : Obj.sections())
if (auto Err = handleSection(
Expand Down Expand Up @@ -713,7 +770,8 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
}

writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
CurStrOffsetSection, Header.Version);
CurStrOffsetSection, Header.Version, SectionLength,
StrOffsetsOptValue);

for (auto Pair : SectionLength) {
auto Index = getContributionIndex(Pair.first, IndexVersion);
Expand Down
81 changes: 81 additions & 0 deletions llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# This test tests that llvm-dwp can successfully promote .debug_str_offsets to
# DWARF64. We do this by using a hidden option to llvm-dwp which is
# "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can
# successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64
# version. This allows us to test the functionality without having to create a
# 4GB .dwo file.

# RUN: yaml2obj %s -o %t.dwo
# RUN: llvm-dwp %t.dwo -o %t.dwp
# RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets
# RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets=disabled
# RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets=enabled
# RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets=always
# RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s
# RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s
# RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s
# RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s
# RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s
# RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s

# DWARF32: .debug_str_offsets.dwo contents:
# DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5
# DWARF32-NEXT: 0x00000008: 00000000 "main"
# DWARF32-NEXT: 0x0000000c: 00000005 "int"
# DWARF32-NEXT: 0x00000010: 00000009 "argc"
# DWARF32-NEXT: 0x00000014: 0000000e "argv"
# DWARF32-NEXT: 0x00000018: 00000013 "char"
# DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
# DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp"
# DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo"

# DWARF64: .debug_str_offsets.dwo contents:
# DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5
# DWARF64-NEXT: 0x00000010: 0000000000000000 "main"
# DWARF64-NEXT: 0x00000018: 0000000000000005 "int"
# DWARF64-NEXT: 0x00000020: 0000000000000009 "argc"
# DWARF64-NEXT: 0x00000028: 000000000000000e "argv"
# DWARF64-NEXT: 0x00000030: 0000000000000013 "char"
# DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)"
# DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp"
# DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo"

# ERROR: invalid value for --dwarf64-str-offsets. Valid values are one of: "enabled", "disabled" or "always".

--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_REL
Machine: EM_X86_64
SectionHeaderStringTable: .strtab
Sections:
- Name: .debug_str_offsets.dwo
Type: SHT_PROGBITS
Flags: [ SHF_EXCLUDE ]
AddressAlign: 0x1
Content: '24000000050000000000000005000000090000000E00000013000000180000004600000051000000'
- Name: .debug_str.dwo
Type: SHT_PROGBITS
Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ]
AddressAlign: 0x1
EntSize: 0x1
Content: 6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00
- Name: .debug_info.dwo
Type: SHT_PROGBITS
Flags: [ SHF_EXCLUDE ]
AddressAlign: 0x1
Content: 540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100
- Name: .debug_abbrev.dwo
Type: SHT_PROGBITS
Flags: [ SHF_EXCLUDE ]
AddressAlign: 0x1
Content: 01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000
- Type: SectionHeaderTable
Sections:
- Name: .strtab
- Name: .debug_str_offsets.dwo
- Name: .debug_str.dwo
- Name: .debug_info.dwo
- Name: .debug_abbrev.dwo
...
9 changes: 9 additions & 0 deletions llvm/tools/llvm-dwp/Opts.td
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,12 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove
"\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n"
"\t\tthe 32 bit/4GB limits of the format.">,
Values<"continue,soft-stop">;

def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets">;
def dwarf64StringOffsets_EQ : Joined<["-", "--"], "dwarf64-str-offsets=">,
HelpText<"default = enabled, This allows .debug_str tables to exceed the 4GB limit\n"
"and have any DWARF32 .debug_str_offsets tables converted to DWARF64 only for tables\n"
"that require 64 bit string offsets. = disabled, This setting doesn't convert DWARF32\n"
".debug_str_offsets tables in .dwo files to DWARF64 in the .dwp file. = always, This\n"
"forces all .debug_str_offsets tables to be emitted as DWARF64. This is used for testing.">,
Values<"disabled,enabled,always">;
26 changes: 25 additions & 1 deletion llvm/tools/llvm-dwp/llvm-dwp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
llvm::BumpPtrAllocator A;
llvm::StringSaver Saver{A};
OnCuIndexOverflow OverflowOptValue = OnCuIndexOverflow::HardStop;
Dwarf64StrOffsets Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Disabled;

opt::InputArgList Args =
Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
llvm::errs() << Msg << '\n';
Expand Down Expand Up @@ -161,6 +163,27 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
}
}

if (Arg *Arg = Args.getLastArg(OPT_dwarf64StringOffsets,
OPT_dwarf64StringOffsets_EQ)) {
if (Arg->getOption().matches(OPT_dwarf64StringOffsets)) {
Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Enabled;
} else {
std::string OptValue = Arg->getValue();
if (OptValue == "disabled") {
Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Disabled;
} else if (OptValue == "enabled") {
Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Enabled;
} else if (OptValue == "always") {
Dwarf64StrOffsetsValue = Dwarf64StrOffsets::Always;
} else {
llvm::errs()
<< "invalid value for --dwarf64-str-offsets. Valid values "
"are one of: \"enabled\", \"disabled\" or \"always\".\n";
exit(1);
}
}
}

for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames))
ExecFilenames.emplace_back(A->getValue());

Expand Down Expand Up @@ -274,7 +297,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
if (!MS)
return error("no object streamer for target " + TripleName, Context);

if (auto Err = write(*MS, DWOFilenames, OverflowOptValue)) {
if (auto Err =
write(*MS, DWOFilenames, OverflowOptValue, Dwarf64StrOffsetsValue)) {
logAllUnhandledErrors(std::move(Err), WithColor::error());
return 1;
}
Expand Down