Skip to content

Commit

Permalink
[DWARFLibrary] Add support to re-construct cu-index
Browse files Browse the repository at this point in the history
Summary:

According to DWARF5 specification and gnu specification for DWARF4 the offset
entry in the CU/TU Index is 32 bits. This presents a problem when
.debug_info.dwo in DWP file grows beyond 4GB. The CU Index becomes partially
corrupted.

This diff adds manual parsing of .debug_info.dwo/.debug_abbrev.dwo to
reconstruct CU index in general, and TU index for DWARF5. This is a work around
until DWARF6 spec is finalized.

Next patch will change internal CU/TU struct to 64 bit, and change uses as
necessary. The plan is to land all the patches in one go after all are approved.

This patch originates from the discussion in: https://discourse.llvm.org/t/dwarf-dwp-4gb-limit/63902

Differential Revision: https://reviews.llvm.org/D137882
  • Loading branch information
ayermolo committed Dec 7, 2022
1 parent 5ebd28f commit a5bd76a
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 5 deletions.
12 changes: 12 additions & 0 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
Expand Up @@ -111,6 +111,10 @@ class DWARFContext : public DIContext {
MacroDwoSection
};

// When set parses debug_info.dwo/debug_abbrev.dwo manually and populates CU
// Index, and TU Index for DWARF5.
bool ParseCUTUIndexManually;

public:
DWARFContext(std::unique_ptr<const DWARFObject> DObj,
std::string DWPName = "",
Expand Down Expand Up @@ -454,6 +458,14 @@ class DWARFContext : public DIContext {
/// into "SectionedAddress Address"
DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address);

/// Returns whether CU/TU should be populated manually. TU Index populated
/// manually only for DWARF5.
bool getParseCUTUIndexManually() const { return ParseCUTUIndexManually; }

/// Sets whether CU/TU should be populated manually. TU Index populated
/// manually only for DWARF5.
void setParseCUTUIndexManually(bool PCUTU) { ParseCUTUIndexManually = PCUTU; }

private:
/// Parse a macro[.dwo] or macinfo[.dwo] section.
std::unique_ptr<DWARFDebugMacro>
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
Expand Up @@ -148,12 +148,14 @@ class DWARFUnitIndex {
public:
const SectionContribution *getContribution(DWARFSectionKind Sec) const;
const SectionContribution *getContribution() const;
SectionContribution &getContribution();

const SectionContribution *getContributions() const {
return Contributions.get();
}

uint64_t getSignature() const { return Signature; }
bool isValid() { return Index; }
};

private:
Expand Down Expand Up @@ -194,6 +196,10 @@ class DWARFUnitIndex {
ArrayRef<Entry> getRows() const {
return makeArrayRef(Rows.get(), Header.NumBuckets);
}

MutableArrayRef<Entry> getMutableRows() {
return makeMutableArrayRef(Rows.get(), Header.NumBuckets);
}
};

} // end namespace llvm
Expand Down
75 changes: 73 additions & 2 deletions llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
Expand Up @@ -782,14 +782,82 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) {
return Success;
}

void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
DWARFUnitIndex &Index) {
using EntryType = DWARFUnitIndex::Entry::SectionContribution;
using EntryMap = DenseMap<uint32_t, EntryType>;
EntryMap Map;
if (DObj.getCUIndexSection().empty())
return;

uint64_t Offset = 0;
uint32_t TruncOffset = 0;
DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
if (!(C.getParseCUTUIndexManually() ||
S.Data.size() >= std::numeric_limits<uint32_t>::max()))
return;

DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0);
while (Data.isValidOffset(Offset)) {
DWARFUnitHeader Header;
if (!Header.extract(C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
logAllUnhandledErrors(
createError("Failed to parse CU header in DWP file"), errs());
Map.clear();
break;
}

auto Iter = Map.insert({TruncOffset,
{Header.getOffset(), Header.getNextUnitOffset() -
Header.getOffset()}});
if (!Iter.second) {
logAllUnhandledErrors(
createError("Collision occured between for truncated offset 0x" +
Twine::utohexstr(TruncOffset)),
errs());
Map.clear();
return;
}

Offset = Header.getNextUnitOffset();
TruncOffset = Offset;
}
});

if (Map.empty())
return;

for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) {
if (!E.isValid())
continue;
DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution();
auto Iter = Map.find(CUOff.getOffset());
if (Iter == Map.end()) {
logAllUnhandledErrors(createError("Could not find CU offset 0x" +
Twine::utohexstr(CUOff.getOffset()) +
" in the Map"),
errs());
break;
}
CUOff.setOffset(Iter->second.getOffset());
if (CUOff.getOffset() != Iter->second.getOffset())
logAllUnhandledErrors(createError("Length of CU in CU index doesn't "
"match calculated length at offset 0x" +
Twine::utohexstr(CUOff.getOffset())),
errs());
}

return;
}

const DWARFUnitIndex &DWARFContext::getCUIndex() {
if (CUIndex)
return *CUIndex;

DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0);

CUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
CUIndex->parse(CUIndexData);
fixupIndex(*DObj, *this, *CUIndex.get());
return *CUIndex;
}

Expand All @@ -798,9 +866,12 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() {
return *TUIndex;

DataExtractor TUIndexData(DObj->getTUIndexSection(), isLittleEndian(), 0);

TUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_EXT_TYPES);
TUIndex->parse(TUIndexData);
// If we are parsing TU-index and for .debug_types section we don't need
// to do anything.
if (TUIndex->getVersion() != 2)
fixupIndex(*DObj, *this, *TUIndex.get());
return *TUIndex;
}

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
Expand Up @@ -269,6 +269,11 @@ DWARFUnitIndex::Entry::getContribution(DWARFSectionKind Sec) const {
return nullptr;
}

DWARFUnitIndex::Entry::SectionContribution &
DWARFUnitIndex::Entry::getContribution() {
return Contributions[Index->InfoColumn];
}

const DWARFUnitIndex::Entry::SectionContribution *
DWARFUnitIndex::Entry::getContribution() const {
return &Contributions[Index->InfoColumn];
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s
@@ -0,0 +1,92 @@
# This test checks if we can correctly parse manull cu and tu index for DWARF5.

# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o \
# RUN: -split-dwarf-file=%t.dwo -dwarf-version=5
# RUN: llvm-dwp %t.dwo -o %t.dwp
# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index %t.dwp | FileCheck -check-prefix=CHECK %s
# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index -manaully-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s

## Note: In order to check whether the type unit index is generated
## there is no need to add the missing DIEs for the structure type of the type unit.

# CHECK-DAG: .debug_info.dwo contents:
# CHECK: 0x00000000: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID1:.*]], type_offset = 0x0019 (next unit at 0x0000001b)
# CHECK: 0x0000001b: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID2:.*]], type_offset = 0x0019 (next unit at 0x00000036)
# CHECK: 0x00000036: Compile Unit: length = 0x00000011, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_compile, abbr_offset = 0x0000, addr_size = 0x08, DWO_id = [[CUID1:.*]] (next unit at 0x0000004b)
# CHECK-DAG: .debug_cu_index contents:
# CHECK: version = 5, units = 1, slots = 2
# CHECK: Index Signature INFO ABBREV
# CHECK: 1 [[CUID1]] [0x0000000000000036, 0x000000000000004b) [0x00000000, 0x00000010)
# CHECK-DAG: .debug_tu_index contents:
# CHECK: version = 5, units = 2, slots = 4
# CHECK: Index Signature INFO ABBREV
# CHECK: 1 [[TUID1]] [0x0000000000000000, 0x000000000000001b) [0x00000000, 0x00000010)
# CHECK: 4 [[TUID2]] [0x000000000000001b, 0x0000000000000036) [0x00000000, 0x00000010)

# CHECK2-DAG: .debug_info.dwo contents:
# CHECK2: 0x00000000: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID1:.*]], type_offset = 0x0019 (next unit at 0x0000001b)
# CHECK2: 0x0000001b: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID2:.*]], type_offset = 0x0019 (next unit at 0x00000036)
# CHECK2: 0x00000036: Compile Unit: length = 0x00000011, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_compile, abbr_offset = 0x0000, addr_size = 0x08, DWO_id = [[CUID1:.*]] (next unit at 0x0000004b)
# CHECK2-DAG: .debug_cu_index contents:
# CHECK2: version = 5, units = 1, slots = 2
# CHECK2: Index Signature INFO ABBREV
# CHECK2: 1 [[CUID1]] [0x0000000000000036, 0x000000000000004b) [0x00000000, 0x00000010)
# CHECK2-DAG: .debug_tu_index contents:
# CHECK2: version = 5, units = 2, slots = 4
# CHECK2: Index Signature INFO ABBREV
# CHECK2: 1 [[TUID1]] [0x0000000000000000, 0x000000000000001b) [0x00000000, 0x00000010)
# CHECK2: 4 [[TUID2]] [0x000000000000001b, 0x0000000000000036) [0x00000000, 0x00000010)

.section .debug_info.dwo,"e",@progbits
.long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
.Ldebug_info_dwo_start0:
.short 5 # DWARF version number
.byte 6 # DWARF Unit Type (DW_UT_split_type)
.byte 8 # Address Size (in bytes)
.long 0 # Offset Into Abbrev. Section
.quad 5657452045627120676 # Type Signature
.long 25 # Type DIE Offset
.byte 2 # Abbrev [2] DW_TAG_type_unit
.byte 3 # Abbrev [3] DW_TAG_structure_type
.byte 0 # End Of Children Mark
.Ldebug_info_dwo_end0:
.section .debug_info.dwo,"e",@progbits
.long .Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit
.Ldebug_info_dwo_start1:
.short 5 # DWARF version number
.byte 6 # DWARF Unit Type (DW_UT_split_type)
.byte 8 # Address Size (in bytes)
.long 0 # Offset Into Abbrev. Section
.quad -8528522068957683993 # Type Signature
.long 25 # Type DIE Offset
.byte 4 # Abbrev [4] DW_TAG_type_unit
.byte 5 # Abbrev [5] DW_TAG_structure_type
.byte 0 # End Of Children Mark
.Ldebug_info_dwo_end1:
.section .debug_info.dwo,"e",@progbits
.long .Ldebug_info_dwo_end2-.Ldebug_info_dwo_start2 # Length of Unit
.Ldebug_info_dwo_start2:
.short 5 # DWARF version number
.byte 5 # DWARF Unit Type (DW_UT_split_compile)
.byte 8 # Address Size (in bytes)
.long 0 # Offset Into Abbrev. Section
.quad 1152943841751211454
.byte 1 # Abbrev [1] DW_TAG_compile_unit
.Ldebug_info_dwo_end2:
.section .debug_abbrev.dwo,"e",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 0 # DW_CHILDREN_no
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 65 # DW_TAG_type_unit
.byte 1 # DW_CHILDREN_yes
.byte 0 # EOM
.byte 0 # EOM
.byte 4 # Abbreviation Code
.byte 65 # DW_TAG_type_unit
.byte 1 # DW_CHILDREN_yes
.byte 0 # EOM
.byte 0 # EOM
.byte 0 # EOM
6 changes: 5 additions & 1 deletion llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s
Expand Up @@ -2,7 +2,8 @@

# RUN: llvm-mc -triple x86_64-unknown-linux --filetype=obj --split-dwarf-file=%t.dwo -dwarf-version=5 %s -o %t.o
# RUN: llvm-dwp %t.dwo -o %t.dwp 2>&1
# RUN: llvm-dwarfdump -debug-macro -debug-cu-index %t.dwp | FileCheck %s
# RUN: llvm-dwarfdump -debug-macro -debug-cu-index %t.dwp | FileCheck -check-prefix=CHECK %s
# RUN: llvm-dwarfdump -debug-macro -debug-cu-index -manaully-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s

# CHECK-DAG: .debug_macro.dwo contents:
# CHECK: macro header: version = 0x0005, flags = 0x00, format = DWARF32
Expand All @@ -15,6 +16,9 @@
# CHECK: Index Signature INFO ABBREV STR_OFFSETS MACRO
# CHECK: 1 0x0000000000000000 [0x0000000000000000, 0x0000000000000019) [0x00000000, 0x00000008) [0x00000000, 0x0000000c) [0x00000000, 0x0000000b)

# CHECK2: Index Signature INFO ABBREV STR_OFFSETS MACRO
# CHECK2: 1 0x0000000000000000 [0x0000000000000000, 0x0000000000000019) [0x00000000, 0x00000008) [0x00000000, 0x0000000c) [0x00000000, 0x0000000b)

.section .debug_info.dwo,"e",@progbits
.long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
.Ldebug_info_dwo_start0:
Expand Down
27 changes: 25 additions & 2 deletions llvm/test/tools/llvm-dwp/X86/type_dedup.test
@@ -1,8 +1,10 @@
RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %p/../Inputs/type_dedup/b.dwo -o %t
RUN: llvm-dwarfdump -v %t | FileCheck %s
RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s
RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s
RUN: llvm-dwp %p/../Inputs/type_dedup/b.dwo -o %tb.dwp
RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %tb.dwp -o %t
RUN: llvm-dwarfdump -v %t | FileCheck %s
RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s
RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s

a.cpp:
struct common { };
Expand Down Expand Up @@ -36,3 +38,24 @@ CHECK: DW_TAG_type_unit
CHECK: 0x00000066: DW_TAG_structure_type
CHECK: DW_AT_name {{.*}} "bdistinct"
CHECK-NOT: Type Unit

CHECK2-LABEL: .debug_types.dwo contents:
CHECK2: [[COMMONUOFF:0x[0-9a-f]*]]:
CHECK2-LABEL: Type Unit: length = 0x00000020, format = DWARF32, version = 0x0004, abbr_offset =
CHECK2: 0x0000, addr_size = 0x08, name = 'common', type_signature = [[COMMONSIG:0x[0-9a-f]*]], type_offset = 0x[[COMMONOFF:.*]] (next unit at [[AUOFF:.*]])
CHECK2: DW_TAG_type_unit
CHECK2: [[COMMONOFF]]: DW_TAG_structure_type
CHECK2: DW_AT_name {{.*}} "common"
CHECK2: [[AUOFF]]:
CHECK2-LABEL: Type Unit: length = 0x00000020, format = DWARF32, version = 0x0004, abbr_offset =
CHECK2: 0x0000, addr_size = 0x08, name = 'adistinct', type_signature = [[ASIG:0x[0-9a-f]*]], type_offset = 0x[[AOFF:.*]] (next unit at [[BUOFF:.*]])
CHECK2: DW_TAG_type_unit
CHECK2: 0x00000042: DW_TAG_structure_type
CHECK2: DW_AT_name {{.*}} "adistinct"
CHECK2: [[BUOFF]]:
CHECK2-LABEL: Type Unit: length = 0x00000020, format = DWARF32, version = 0x0004, abbr_offset =
CHECK2: 0x{{.*}}, addr_size = 0x08, name = 'bdistinct', type_signature = [[BSIG:0x[0-9a-f]*]], type_offset = 0x[[BOFF:.*]] (next unit at [[XUOFF:.*]])
CHECK2: DW_TAG_type_unit
CHECK2: 0x00000066: DW_TAG_structure_type
CHECK2: DW_AT_name {{.*}} "bdistinct"
CHECK2-NOT: Type Unit
8 changes: 8 additions & 0 deletions llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
Expand Up @@ -247,6 +247,13 @@ static cl::opt<bool>
cl::desc("Show the sizes of all debug sections, "
"expressed in bytes."),
cat(DwarfDumpCategory));
static cl::opt<bool> ManuallyGenerateUnitIndex(
"manaully-generate-unit-index",
cl::desc("if the input is dwp file, parse .debug_info "
"section and use it to populate "
"DW_SECT_INFO contributions in cu-index. "
"For DWARF5 it also populated TU Index."),
cl::init(false), cl::Hidden, cl::cat(DwarfDumpCategory));
static cl::opt<bool>
ShowSources("show-sources",
cl::desc("Show the sources across all compilation units."),
Expand Down Expand Up @@ -635,6 +642,7 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
*Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "",
RecoverableErrorHandler);
DICtx->setParseCUTUIndexManually(ManuallyGenerateUnitIndex);
if (!HandleObj(*Obj, *DICtx, Filename, OS))
Result = false;
}
Expand Down

0 comments on commit a5bd76a

Please sign in to comment.