Skip to content

Commit

Permalink
DWARFVerifier: Enhance validation of .debug_names hash tables
Browse files Browse the repository at this point in the history
Summary:
This patch adds more checks to the .debug_names validator. Specifically,
they check for:
- buckets claiming to be non-empty but pointing to mismatched hashes
  (most consumers would interpret this as an empty bucket, but it
  questionable whether the generator meant that)
- hashes that are not reachable from any bucket
- names with incorrect hashes

Together, these checks ensure that any name in the index can be reached
through the hash table using the regular lookup algorithm. We also warn
if we encounter a name index without a hash table.

Reviewers: JDevlieghere, aprantl, dblaikie

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D44433

llvm-svn: 327699
  • Loading branch information
labath committed Mar 16, 2018
1 parent 883e96c commit 906b777
Show file tree
Hide file tree
Showing 6 changed files with 432 additions and 27 deletions.
22 changes: 11 additions & 11 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,14 +348,14 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
}
};

public:
/// A single entry in the Name Table (Dwarf 5 sect. 6.1.1.4.6) of the Name
/// Index.
struct NameTableEntry {
uint32_t StringOffset; ///< Offset of the name of the described entities.
uint32_t EntryOffset; ///< Offset of the first Entry in the list.
};

public:
/// Represents a single accelerator table within the Dwarf 5 .debug_names
/// section.
class NameIndex {
Expand All @@ -373,16 +373,6 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
uint32_t EntryOffsetsBase;
uint32_t EntriesBase;

/// Reads an entry in the Hash Array for the given Index. The input Index
/// is 1-based.
uint32_t getHashArrayEntry(uint32_t Index) const;

/// Reads an entry in the Name Table for the given Index. The Name Table
/// consists of two arrays -- String Offsets and Entry Offsets. The returned
/// offsets are relative to the starts of respective sections. Input Index
/// is 1-based.
NameTableEntry getNameTableEntry(uint32_t Index) const;

Expected<Entry> getEntry(uint32_t *Offset) const;

void dumpCUs(ScopedPrinter &W) const;
Expand Down Expand Up @@ -423,6 +413,16 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
uint32_t getBucketArrayEntry(uint32_t Bucket) const;
uint32_t getBucketCount() const { return Hdr.BucketCount; }

/// Reads an entry in the Hash Array for the given Index. The input Index
/// is 1-based.
uint32_t getHashArrayEntry(uint32_t Index) const;

/// Reads an entry in the Name Table for the given Index. The Name Table
/// consists of two arrays -- String Offsets and Entry Offsets. The returned
/// offsets are relative to the starts of respective sections. Input Index
/// is 1-based.
NameTableEntry getNameTableEntry(uint32_t Index) const;

uint32_t getNameCount() const { return Hdr.NameCount; }

llvm::Error extract();
Expand Down
6 changes: 5 additions & 1 deletion llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H

#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"

Expand All @@ -26,7 +27,6 @@ class DWARFDie;
class DWARFUnit;
class DWARFDataExtractor;
class DWARFDebugAbbrev;
class DWARFDebugNames;
class DataExtractor;
struct DWARFSection;

Expand Down Expand Up @@ -234,6 +234,8 @@ class DWARFVerifier {
const char *SectionName);

unsigned verifyDebugNamesCULists(const DWARFDebugNames &AccelTable);
unsigned verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
const DataExtractor &StrData);

/// Verify that the DWARF v5 accelerator table is valid.
///
Expand All @@ -242,6 +244,8 @@ class DWARFVerifier {
/// section and can be parsed.
/// - The CU lists reference existing compile units.
/// - The buckets have a valid index, or they are empty.
/// - All names are reachable via the hash table (they have the correct hash,
/// and the hash is in the correct bucket).
///
/// \param AccelSection section containing the acceleration table
/// \param StrData string section
Expand Down
131 changes: 116 additions & 15 deletions llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
#include "llvm/Support/DJB.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -826,6 +826,119 @@ DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
return NumErrors;
}

unsigned
DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
const DataExtractor &StrData) {
struct BucketInfo {
uint32_t Bucket;
uint32_t Index;

constexpr BucketInfo(uint32_t Bucket, uint32_t Index)
: Bucket(Bucket), Index(Index) {}
bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; };
};

uint32_t NumErrors = 0;
if (NI.getBucketCount() == 0) {
warn() << formatv("Name Index @ {0:x} does not contain a hash table.\n",
NI.getUnitOffset());
return NumErrors;
}

// Build up a list of (Bucket, Index) pairs. We use this later to verify that
// each Name is reachable from the appropriate bucket.
std::vector<BucketInfo> BucketStarts;
BucketStarts.reserve(NI.getBucketCount() + 1);
for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; ++Bucket) {
uint32_t Index = NI.getBucketArrayEntry(Bucket);
if (Index > NI.getNameCount()) {
error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid "
"value {2}. Valid range is [0, {3}].\n",
Bucket, NI.getUnitOffset(), Index, NI.getNameCount());
++NumErrors;
continue;
}
if (Index > 0)
BucketStarts.emplace_back(Bucket, Index);
}

// If there were any buckets with invalid values, skip further checks as they
// will likely produce many errors which will only confuse the actual root
// problem.
if (NumErrors > 0)
return NumErrors;

// Sort the list in the order of increasing "Index" entries.
array_pod_sort(BucketStarts.begin(), BucketStarts.end());

// Insert a sentinel entry at the end, so we can check that the end of the
// table is covered in the loop below.
BucketStarts.emplace_back(NI.getBucketCount(), NI.getNameCount() + 1);

// Loop invariant: NextUncovered is the (1-based) index of the first Name
// which is not reachable by any of the buckets we processed so far (and
// hasn't been reported as uncovered).
uint32_t NextUncovered = 1;
for (const BucketInfo &B : BucketStarts) {
// Under normal circumstances B.Index be equal to NextUncovered, but it can
// be less if a bucket points to names which are already known to be in some
// bucket we processed earlier. In that case, we won't trigger this error,
// but report the mismatched hash value error instead. (We know the hash
// will not match because we have already verified that the name's hash
// puts it into the previous bucket.)
if (B.Index > NextUncovered) {
error() << formatv("Name Index @ {0:x}: Name table entries [{1}, {2}] "
"are not covered by the hash table.\n",
NI.getUnitOffset(), NextUncovered, B.Index - 1);
++NumErrors;
}
uint32_t Idx = B.Index;

// The rest of the checks apply only to non-sentinel entries.
if (B.Bucket == NI.getBucketCount())
break;

// This triggers if a non-empty bucket points to a name with a mismatched
// hash. Clients are likely to interpret this as an empty bucket, because a
// mismatched hash signals the end of a bucket, but if this is indeed an
// empty bucket, the producer should have signalled this by marking the
// bucket as empty.
uint32_t FirstHash = NI.getHashArrayEntry(Idx);
if (FirstHash % NI.getBucketCount() != B.Bucket) {
error() << formatv(
"Name Index @ {0:x}: Bucket {1} is not empty but points to a "
"mismatched hash value {2:x} (belonging to bucket {3}).\n",
NI.getUnitOffset(), B.Bucket, FirstHash,
FirstHash % NI.getBucketCount());
++NumErrors;
}

// This find the end of this bucket and also verifies that all the hashes in
// this bucket are correct by comparing the stored hashes to the ones we
// compute ourselves.
while (Idx <= NI.getNameCount()) {
uint32_t Hash = NI.getHashArrayEntry(Idx);
if (Hash % NI.getBucketCount() != B.Bucket)
break;

auto NTE = NI.getNameTableEntry(Idx);
const char *Str = StrData.getCStr(&NTE.StringOffset);
if (caseFoldingDjbHash(Str) != Hash) {
error() << formatv("Name Index @ {0:x}: String ({1}) at index {2} "
"hashes to {3:x}, but "
"the Name Index hash is {4:x}\n",
NI.getUnitOffset(), Str, Idx,
caseFoldingDjbHash(Str), Hash);
++NumErrors;
}

++Idx;
}
NextUncovered = std::max(NextUncovered, Idx);
}
return NumErrors;
}

unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
const DataExtractor &StrData) {
unsigned NumErrors = 0;
Expand All @@ -843,20 +956,8 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
}

NumErrors += verifyDebugNamesCULists(AccelTable);

for (const DWARFDebugNames::NameIndex &NI : AccelTable) {
for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End;
++Bucket) {
uint32_t Index = NI.getBucketArrayEntry(Bucket);
if (Index > NI.getNameCount()) {
error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid "
"value {2}. Valid range is [0, {3}].\n",
Bucket, NI.getUnitOffset(), Index,
NI.getNameCount());
++NumErrors;
}
}
}
for (const auto &NI : AccelTable)
NumErrors += verifyNameIndexBuckets(NI, StrData);

return NumErrors;
}
Expand Down
80 changes: 80 additions & 0 deletions llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-no-buckets.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \
# RUN: llvm-dwarfdump -verify - | FileCheck %s

# CHECK: warning: Name Index @ 0x0 does not contain a hash table.
.section .debug_str,"MS",@progbits,1
.Lstring_foo:
.asciz "foo"
.Lstring_producer:
.asciz "Hand-written dwarf"

.section .debug_abbrev,"",@progbits
.Lsection_abbrev:
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 14 # DW_FORM_strp
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 14 # DW_FORM_strp
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)

.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Lcu_end0-.Lcu_start0 # Length of Unit
.Lcu_start0:
.short 4 # DWARF version number
.long .Lsection_abbrev # Offset Into Abbrev. Section
.byte 8 # Address Size (in bytes)
.byte 1 # Abbrev [1] DW_TAG_compile_unit
.long .Lstring_producer # DW_AT_producer
.short 12 # DW_AT_language
.Ldie_foo:
.byte 2 # Abbrev [2] DW_TAG_subprogram
.long .Lstring_foo # DW_AT_name
# DW_AT_external
.byte 0 # End Of Children Mark
.Lcu_end0:

.section .debug_names,"",@progbits
.long .Lnames_end0-.Lnames_start0 # Header: contribution length
.Lnames_start0:
.short 5 # Header: version
.short 0 # Header: padding
.long 1 # Header: compilation unit count
.long 0 # Header: local type unit count
.long 0 # Header: foreign type unit count
.long 0 # Header: bucket count
.long 1 # Header: name count
.long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
.long 0 # Header: augmentation length
.long .Lcu_begin0 # Compilation unit 0
.long .Lstring_foo # String in Bucket 1: foo
.long .Lnames0-.Lnames_entries0 # Offset in Bucket 1
.Lnames_abbrev_start0:
.byte 46 # Abbrev code
.byte 46 # DW_TAG_subprogram
.byte 3 # DW_IDX_die_offset
.byte 6 # DW_FORM_data4
.byte 0 # End of abbrev
.byte 0 # End of abbrev
.byte 0 # End of abbrev list
.Lnames_abbrev_end0:
.Lnames_entries0:
.Lnames0:
.byte 46 # Abbrev code
.long .Ldie_foo-.Lcu_begin0 # DW_IDX_die_offset
.long 0 # End of list: foo
.p2align 2
.Lnames_end0:

0 comments on commit 906b777

Please sign in to comment.