DWARFVerifier: Enhance validation of .debug_names hash tables

Summary: This patch adds more checks to the .debug_names validator. Specifically, they check for: - buckets claiming to be non-empty but pointing to mismatched hashes (most consumers would interpret this as an empty bucket, but it questionable whether the generator meant that) - hashes that are not reachable from any bucket - names with incorrect hashes Together, these checks ensure that any name in the index can be reached through the hash table using the regular lookup algorithm. We also warn if we encounter a name index without a hash table. Reviewers: JDevlieghere, aprantl, dblaikie Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D44433 llvm-svn: 327699
llvm · Mar 16, 2018 · 906b777 · 906b777
1 parent 883e96c
commit 906b777
Show file tree

Hide file tree

Showing 6 changed files with 432 additions and 27 deletions.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -348,14 +348,14 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
     }
   };
 
+public:
   /// A single entry in the Name Table (Dwarf 5 sect. 6.1.1.4.6) of the Name
   /// Index.
   struct NameTableEntry {
     uint32_t StringOffset; ///< Offset of the name of the described entities.
     uint32_t EntryOffset;  ///< Offset of the first Entry in the list.
   };
 
-public:
   /// Represents a single accelerator table within the Dwarf 5 .debug_names
   /// section.
   class NameIndex {
@@ -373,16 +373,6 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
     uint32_t EntryOffsetsBase;
     uint32_t EntriesBase;
 
-    /// Reads an entry in the Hash Array for the given Index. The input Index
-    /// is 1-based.
-    uint32_t getHashArrayEntry(uint32_t Index) const;
-
-    /// Reads an entry in the Name Table for the given Index. The Name Table
-    /// consists of two arrays -- String Offsets and Entry Offsets. The returned
-    /// offsets are relative to the starts of respective sections. Input Index
-    /// is 1-based.
-    NameTableEntry getNameTableEntry(uint32_t Index) const;
-
     Expected<Entry> getEntry(uint32_t *Offset) const;
 
     void dumpCUs(ScopedPrinter &W) const;
@@ -423,6 +413,16 @@ class DWARFDebugNames : public DWARFAcceleratorTable {
     uint32_t getBucketArrayEntry(uint32_t Bucket) const;
     uint32_t getBucketCount() const { return Hdr.BucketCount; }
 
+    /// Reads an entry in the Hash Array for the given Index. The input Index
+    /// is 1-based.
+    uint32_t getHashArrayEntry(uint32_t Index) const;
+
+    /// Reads an entry in the Name Table for the given Index. The Name Table
+    /// consists of two arrays -- String Offsets and Entry Offsets. The returned
+    /// offsets are relative to the starts of respective sections. Input Index
+    /// is 1-based.
+    NameTableEntry getNameTableEntry(uint32_t Index) const;
+
     uint32_t getNameCount() const { return Hdr.NameCount; }
 
     llvm::Error extract();

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -11,6 +11,7 @@
 #define LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H
 
 #include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
 
@@ -26,7 +27,6 @@ class DWARFDie;
 class DWARFUnit;
 class DWARFDataExtractor;
 class DWARFDebugAbbrev;
-class DWARFDebugNames;
 class DataExtractor;
 struct DWARFSection;
 
@@ -234,6 +234,8 @@ class DWARFVerifier {
                                  const char *SectionName);
 
   unsigned verifyDebugNamesCULists(const DWARFDebugNames &AccelTable);
+  unsigned verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
+                                  const DataExtractor &StrData);
 
   /// Verify that the DWARF v5 accelerator table is valid.
   ///
@@ -242,6 +244,8 @@ class DWARFVerifier {
   ///   section and can be parsed.
   /// - The CU lists reference existing compile units.
   /// - The buckets have a valid index, or they are empty.
+  /// - All names are reachable via the hash table (they have the correct hash,
+  ///   and the hash is in the correct bucket).
   ///
   /// \param AccelSection section containing the acceleration table
   /// \param StrData string section

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
-#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
@@ -826,6 +826,119 @@ DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
   return NumErrors;
 }
 
+unsigned
+DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
+                                      const DataExtractor &StrData) {
+  struct BucketInfo {
+    uint32_t Bucket;
+    uint32_t Index;
+
+    constexpr BucketInfo(uint32_t Bucket, uint32_t Index)
+        : Bucket(Bucket), Index(Index) {}
+    bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; };
+  };
+
+  uint32_t NumErrors = 0;
+  if (NI.getBucketCount() == 0) {
+    warn() << formatv("Name Index @ {0:x} does not contain a hash table.\n",
+                      NI.getUnitOffset());
+    return NumErrors;
+  }
+
+  // Build up a list of (Bucket, Index) pairs. We use this later to verify that
+  // each Name is reachable from the appropriate bucket.
+  std::vector<BucketInfo> BucketStarts;
+  BucketStarts.reserve(NI.getBucketCount() + 1);
+  for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; ++Bucket) {
+    uint32_t Index = NI.getBucketArrayEntry(Bucket);
+    if (Index > NI.getNameCount()) {
+      error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid "
+                         "value {2}. Valid range is [0, {3}].\n",
+                         Bucket, NI.getUnitOffset(), Index, NI.getNameCount());
+      ++NumErrors;
+      continue;
+    }
+    if (Index > 0)
+      BucketStarts.emplace_back(Bucket, Index);
+  }
+
+  // If there were any buckets with invalid values, skip further checks as they
+  // will likely produce many errors which will only confuse the actual root
+  // problem.
+  if (NumErrors > 0)
+    return NumErrors;
+
+  // Sort the list in the order of increasing "Index" entries.
+  array_pod_sort(BucketStarts.begin(), BucketStarts.end());
+
+  // Insert a sentinel entry at the end, so we can check that the end of the
+  // table is covered in the loop below.
+  BucketStarts.emplace_back(NI.getBucketCount(), NI.getNameCount() + 1);
+
+  // Loop invariant: NextUncovered is the (1-based) index of the first Name
+  // which is not reachable by any of the buckets we processed so far (and
+  // hasn't been reported as uncovered).
+  uint32_t NextUncovered = 1;
+  for (const BucketInfo &B : BucketStarts) {
+    // Under normal circumstances B.Index be equal to NextUncovered, but it can
+    // be less if a bucket points to names which are already known to be in some
+    // bucket we processed earlier. In that case, we won't trigger this error,
+    // but report the mismatched hash value error instead. (We know the hash
+    // will not match because we have already verified that the name's hash
+    // puts it into the previous bucket.)
+    if (B.Index > NextUncovered) {
+      error() << formatv("Name Index @ {0:x}: Name table entries [{1}, {2}] "
+                         "are not covered by the hash table.\n",
+                         NI.getUnitOffset(), NextUncovered, B.Index - 1);
+      ++NumErrors;
+    }
+    uint32_t Idx = B.Index;
+
+    // The rest of the checks apply only to non-sentinel entries.
+    if (B.Bucket == NI.getBucketCount())
+      break;
+
+    // This triggers if a non-empty bucket points to a name with a mismatched
+    // hash. Clients are likely to interpret this as an empty bucket, because a
+    // mismatched hash signals the end of a bucket, but if this is indeed an
+    // empty bucket, the producer should have signalled this by marking the
+    // bucket as empty.
+    uint32_t FirstHash = NI.getHashArrayEntry(Idx);
+    if (FirstHash % NI.getBucketCount() != B.Bucket) {
+      error() << formatv(
+          "Name Index @ {0:x}: Bucket {1} is not empty but points to a "
+          "mismatched hash value {2:x} (belonging to bucket {3}).\n",
+          NI.getUnitOffset(), B.Bucket, FirstHash,
+          FirstHash % NI.getBucketCount());
+      ++NumErrors;
+    }
+
+    // This find the end of this bucket and also verifies that all the hashes in
+    // this bucket are correct by comparing the stored hashes to the ones we
+    // compute ourselves.
+    while (Idx <= NI.getNameCount()) {
+      uint32_t Hash = NI.getHashArrayEntry(Idx);
+      if (Hash % NI.getBucketCount() != B.Bucket)
+        break;
+
+      auto NTE = NI.getNameTableEntry(Idx);
+      const char *Str = StrData.getCStr(&NTE.StringOffset);
+      if (caseFoldingDjbHash(Str) != Hash) {
+        error() << formatv("Name Index @ {0:x}: String ({1}) at index {2} "
+                           "hashes to {3:x}, but "
+                           "the Name Index hash is {4:x}\n",
+                           NI.getUnitOffset(), Str, Idx,
+                           caseFoldingDjbHash(Str), Hash);
+        ++NumErrors;
+      }
+
+      ++Idx;
+    }
+    NextUncovered = std::max(NextUncovered, Idx);
+  }
+  return NumErrors;
+}
+
 unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
                                          const DataExtractor &StrData) {
   unsigned NumErrors = 0;
@@ -843,20 +956,8 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
   }
 
   NumErrors += verifyDebugNamesCULists(AccelTable);
-
-  for (const DWARFDebugNames::NameIndex &NI : AccelTable) {
-    for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End;
-         ++Bucket) {
-      uint32_t Index = NI.getBucketArrayEntry(Bucket);
-      if (Index > NI.getNameCount()) {
-        error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid "
-                           "value {2}. Valid range is [0, {3}].\n",
-                           Bucket, NI.getUnitOffset(), Index,
-                           NI.getNameCount());
-        ++NumErrors;
-      }
-    }
-  }
+  for (const auto &NI : AccelTable)
+    NumErrors += verifyNameIndexBuckets(NI, StrData);
 
   return NumErrors;
 }

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-no-buckets.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-no-buckets.s
@@ -0,0 +1,80 @@
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \
+# RUN:   llvm-dwarfdump -verify - | FileCheck %s
+
+# CHECK: warning: Name Index @ 0x0 does not contain a hash table.
+	.section	.debug_str,"MS",@progbits,1
+.Lstring_foo:
+	.asciz	"foo"
+.Lstring_producer:
+	.asciz	"Hand-written dwarf"
+
+	.section	.debug_abbrev,"",@progbits
+.Lsection_abbrev:
+	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	1                       # DW_CHILDREN_yes
+	.byte	37                      # DW_AT_producer
+	.byte	14                      # DW_FORM_strp
+	.byte	19                      # DW_AT_language
+	.byte	5                       # DW_FORM_data2
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	2                       # Abbreviation Code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	63                      # DW_AT_external
+	.byte	25                      # DW_FORM_flag_present
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	0                       # EOM(3)
+
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	.Lcu_end0-.Lcu_start0   # Length of Unit
+.Lcu_start0:
+	.short	4                       # DWARF version number
+	.long	.Lsection_abbrev        # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] DW_TAG_compile_unit
+	.long	.Lstring_producer       # DW_AT_producer
+	.short	12                      # DW_AT_language
+.Ldie_foo:
+	.byte	2                       # Abbrev [2] DW_TAG_subprogram
+	.long	.Lstring_foo            # DW_AT_name
+                                        # DW_AT_external
+	.byte	0                       # End Of Children Mark
+.Lcu_end0:
+
+	.section	.debug_names,"",@progbits
+	.long	.Lnames_end0-.Lnames_start0 # Header: contribution length
+.Lnames_start0:
+	.short	5                       # Header: version
+	.short	0                       # Header: padding
+	.long	1                       # Header: compilation unit count
+	.long	0                       # Header: local type unit count
+	.long	0                       # Header: foreign type unit count
+	.long	0                       # Header: bucket count
+	.long	1                       # Header: name count
+	.long	.Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
+	.long	0                       # Header: augmentation length
+	.long	.Lcu_begin0             # Compilation unit 0
+	.long	.Lstring_foo            # String in Bucket 1: foo
+	.long	.Lnames0-.Lnames_entries0 # Offset in Bucket 1
+.Lnames_abbrev_start0:
+	.byte	46                      # Abbrev code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	3                       # DW_IDX_die_offset
+	.byte	6                       # DW_FORM_data4
+	.byte	0                       # End of abbrev
+	.byte	0                       # End of abbrev
+	.byte	0                       # End of abbrev list
+.Lnames_abbrev_end0:
+.Lnames_entries0:
+.Lnames0:
+	.byte	46                      # Abbrev code
+	.long	.Ldie_foo-.Lcu_begin0   # DW_IDX_die_offset
+	.long	0                       # End of list: foo
+	.p2align	2
+.Lnames_end0: