From 3280e0467f166d8277c99ead408c524205f0765f Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 9 May 2018 12:06:17 +0000 Subject: [PATCH] DWARFVerifier: Check "completeness" of .debug_names section Summary: This patch implements a check which makes sure all entries required by the DWARF v5 specification are present in the Name Index. The algorithm tries to follow the wording of Section 6.1.1.1 of the spec as closely as possible. The main deviation from it is that instead of a whitelist-based approach in the spec "The name index must contain an entry for each debugging information entry that defines a named subprogram, label, variable, type, or namespace" I chose a blacklist-based one, where I consider everything to be "in" and then remove the entries that don't make sense. I did this because it has more potential for catching interesting cases and the above is a bit vague (it uses plain words like "variable" and "subprogram", but the rest of the section speaks about specific TAGs). This approach has raised some interesting questions, the main one being whether enumerator values should be indexed. The consensus seems to be that they should, although it does not follow from section 6.1.1.1. For the time being I made the verifier ignore these, as LLVM does not do this yet, and I wanted to get a clean run when verifying generated debug info. Another interesting case was the DW_TAG_imported_declaration. It was not immediately clear to me whether this should go in or not, but currently it is not indexed, and (unlike the enumerators) in does not seem to cause problems for LLDB, so I've also ignored it. Reviewers: JDevlieghere, aprantl, dblaikie Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D46583 llvm-svn: 331868 --- .../DebugInfo/DWARF/DWARFAcceleratorTable.h | 16 ++ .../llvm/DebugInfo/DWARF/DWARFVerifier.h | 7 +- .../DebugInfo/DWARF/DWARFAcceleratorTable.cpp | 33 ++- llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp | 146 +++++++++++++ .../X86/debug-names-verify-completeness.s | 195 ++++++++++++++++++ 5 files changed, 393 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index 7cc2825d997ba..c98275ad09f54 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -429,6 +429,9 @@ class DWARFDebugNames : public DWARFAcceleratorTable { Expected getEntry(uint32_t *Offset) const; + /// Look up all entries in this Name Index matching \c Key. + iterator_range equal_range(StringRef Key) const; + llvm::Error extract(); uint32_t getUnitOffset() const { return Base; } uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; } @@ -444,6 +447,10 @@ class DWARFDebugNames : public DWARFAcceleratorTable { /// "NameIndices" vector in the Accelerator section. const NameIndex *CurrentIndex = nullptr; + /// Whether this is a local iterator (searches in CurrentIndex only) or not + /// (searches all name indices). + bool IsLocal; + Optional CurrentEntry; unsigned DataOffset = 0; ///< Offset into the section. std::string Key; ///< The Key we are searching for. @@ -464,6 +471,10 @@ class DWARFDebugNames : public DWARFAcceleratorTable { /// Indexes in the section in sequence. ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key); + /// Create a "begin" iterator for looping over all entries in a specific + /// Name Index. Other indices in the section will not be visited. + ValueIterator(const NameIndex &NI, StringRef Key); + /// End marker. ValueIterator() = default; @@ -488,6 +499,7 @@ class DWARFDebugNames : public DWARFAcceleratorTable { private: SmallVector NameIndices; + DenseMap CUToNameIndex; public: DWARFDebugNames(const DWARFDataExtractor &AccelSection, @@ -503,6 +515,10 @@ class DWARFDebugNames : public DWARFAcceleratorTable { using const_iterator = SmallVector::const_iterator; const_iterator begin() const { return NameIndices.begin(); } const_iterator end() const { return NameIndices.end(); } + + /// Return the Name Index covering the compile unit at CUOffset, or nullptr if + /// there is no Name Index covering that unit. + const NameIndex *getCUNameIndex(uint32_t CUOffset); }; } // end namespace llvm diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 85164ec92a9bc..4164a8b4528aa 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -25,6 +25,7 @@ struct DWARFAttribute; class DWARFContext; class DWARFDie; class DWARFUnit; +class DWARFCompileUnit; class DWARFDataExtractor; class DWARFDebugAbbrev; class DataExtractor; @@ -242,6 +243,8 @@ class DWARFVerifier { DWARFDebugNames::AttributeEncoding AttrEnc); unsigned verifyNameIndexEntries(const DWARFDebugNames::NameIndex &NI, uint32_t Name, const DataExtractor &StrData); + unsigned verifyNameIndexCompleteness(const DWARFDie &Die, + const DWARFDebugNames::NameIndex &NI); /// Verify that the DWARF v5 accelerator table is valid. /// @@ -253,8 +256,8 @@ class DWARFVerifier { /// - The buckets have a valid index, or they are empty. /// - All names are reachable via the hash table (they have the correct hash, /// and the hash is in the correct bucket). - /// - Information in the index entries is consistent with the debug_info - /// section DIEs. + /// - Information in the index entries is complete (all required entries are + /// present) and consistent with the debug_info section DIEs. /// /// \param AccelSection section containing the acceleration table /// \param StrData string section diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 9d1d0708e08d9..1fa3bc09f4b7f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -770,6 +770,11 @@ llvm::Error DWARFDebugNames::extract() { return Error::success(); } +iterator_range +DWARFDebugNames::NameIndex::equal_range(StringRef Key) const { + return make_range(ValueIterator(*this, Key), ValueIterator()); +} + LLVM_DUMP_METHOD void DWARFDebugNames::dump(raw_ostream &OS) const { ScopedPrinter W(OS); for (const NameIndex &NI : NameIndices) @@ -844,20 +849,44 @@ void DWARFDebugNames::ValueIterator::next() { if (getEntryAtCurrentOffset()) return; - // Try the next Name Index. + // If we're a local iterator, we're done. + if (IsLocal) { + setEnd(); + return; + } + + // Otherwise, try the next index. ++CurrentIndex; searchFromStartOfCurrentIndex(); } DWARFDebugNames::ValueIterator::ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key) - : CurrentIndex(AccelTable.NameIndices.begin()), Key(Key) { + : CurrentIndex(AccelTable.NameIndices.begin()), IsLocal(false), Key(Key) { searchFromStartOfCurrentIndex(); } +DWARFDebugNames::ValueIterator::ValueIterator( + const DWARFDebugNames::NameIndex &NI, StringRef Key) + : CurrentIndex(&NI), IsLocal(true), Key(Key) { + if (!findInCurrentIndex()) + setEnd(); +} + iterator_range DWARFDebugNames::equal_range(StringRef Key) const { if (NameIndices.empty()) return make_range(ValueIterator(), ValueIterator()); return make_range(ValueIterator(*this, Key), ValueIterator()); } + +const DWARFDebugNames::NameIndex * +DWARFDebugNames::getCUNameIndex(uint32_t CUOffset) { + if (CUToNameIndex.size() == 0 && NameIndices.size() > 0) { + for (const auto &NI : *this) { + for (uint32_t CU = 0; CU < NI.getCUCount(); ++CU) + CUToNameIndex.try_emplace(NI.getCUOffset(CU), &NI); + } + } + return CUToNameIndex.lookup(CUOffset); +} diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index ba7aef37eac58..91e5cfede2947 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -1122,6 +1122,7 @@ DWARFVerifier::verifyNameIndexEntries(const DWARFDebugNames::NameIndex &NI, "of DIE @ {2:x}: index - {3}; debug_info - {4}.\n", NI.getUnitOffset(), EntryID, DIEOffset, Str, make_range(EntryNames.begin(), EntryNames.end())); + ++NumErrors; } } handleAllErrors(EntryOr.takeError(), @@ -1142,6 +1143,141 @@ DWARFVerifier::verifyNameIndexEntries(const DWARFDebugNames::NameIndex &NI, return NumErrors; } +static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { + Optional Location = Die.findRecursively(DW_AT_location); + if (!Location) + return false; + + auto ContainsInterestingOperators = [&](StringRef D) { + DWARFUnit *U = Die.getDwarfUnit(); + DataExtractor Data(D, DCtx.isLittleEndian(), U->getAddressByteSize()); + DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); + return any_of(Expression, [](DWARFExpression::Operation &Op) { + return !Op.isError() && (Op.getCode() == DW_OP_addr || + Op.getCode() == DW_OP_form_tls_address || + Op.getCode() == DW_OP_GNU_push_tls_address); + }); + }; + + if (Optional> Expr = Location->getAsBlock()) { + // Inlined location. + if (ContainsInterestingOperators(toStringRef(*Expr))) + return true; + } else if (Optional Offset = Location->getAsSectionOffset()) { + // Location list. + if (const DWARFDebugLoc *DebugLoc = DCtx.getDebugLoc()) { + if (const DWARFDebugLoc::LocationList *LocList = + DebugLoc->getLocationListAtOffset(*Offset)) { + if (any_of(LocList->Entries, [&](const DWARFDebugLoc::Entry &E) { + return ContainsInterestingOperators({E.Loc.data(), E.Loc.size()}); + })) + return true; + } + } + } + return false; +} + +unsigned DWARFVerifier::verifyNameIndexCompleteness( + const DWARFDie &Die, const DWARFDebugNames::NameIndex &NI) { + + // First check, if the Die should be indexed. The code follows the DWARF v5 + // wording as closely as possible. + + // "All non-defining declarations (that is, debugging information entries + // with a DW_AT_declaration attribute) are excluded." + if (Die.find(DW_AT_declaration)) + return 0; + + // "DW_TAG_namespace debugging information entries without a DW_AT_name + // attribute are included with the name “(anonymous namespace)”. + // All other debugging information entries without a DW_AT_name attribute + // are excluded." + // "If a subprogram or inlined subroutine is included, and has a + // DW_AT_linkage_name attribute, there will be an additional index entry for + // the linkage name." + auto EntryNames = getNames(Die); + if (EntryNames.empty()) + return 0; + + // We deviate from the specification here, which says: + // "The name index must contain an entry for each debugging information entry + // that defines a named subprogram, label, variable, type, or namespace, + // subject to ..." + // Instead whitelisting all TAGs representing a "type" or a "subprogram", to + // make sure we catch any missing items, we instead blacklist all TAGs that we + // know shouldn't be indexed. + switch (Die.getTag()) { + // Compile unit has a name but it shouldn't be indexed. + case DW_TAG_compile_unit: + return 0; + + // Function and template parameters are not globally visible, so we shouldn't + // index them. + case DW_TAG_formal_parameter: + case DW_TAG_template_value_parameter: + case DW_TAG_template_type_parameter: + case DW_TAG_GNU_template_parameter_pack: + case DW_TAG_GNU_template_template_param: + return 0; + + // Object members aren't globally visible. + case DW_TAG_member: + return 0; + + // According to a strict reading of the specification, enumerators should not + // be indexed (and LLVM currently does not do that). However, this causes + // problems for the debuggers, so we may need to reconsider this. + case DW_TAG_enumerator: + return 0; + + // Imported declarations should not be indexed according to the specification + // and LLVM currently does not do that. + case DW_TAG_imported_declaration: + return 0; + + // "DW_TAG_subprogram, DW_TAG_inlined_subroutine, and DW_TAG_label debugging + // information entries without an address attribute (DW_AT_low_pc, + // DW_AT_high_pc, DW_AT_ranges, or DW_AT_entry_pc) are excluded." + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: + case DW_TAG_label: + if (Die.findRecursively( + {DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_entry_pc})) + break; + return 0; + + // "DW_TAG_variable debugging information entries with a DW_AT_location + // attribute that includes a DW_OP_addr or DW_OP_form_tls_address operator are + // included; otherwise, they are excluded." + // + // LLVM extension: We also add DW_OP_GNU_push_tls_address to this list. + case DW_TAG_variable: + if (isVariableIndexable(Die, DCtx)) + break; + return 0; + + default: + break; + } + + // Now we know that our Die should be present in the Index. Let's check if + // that's the case. + unsigned NumErrors = 0; + for (StringRef Name : EntryNames) { + if (none_of(NI.equal_range(Name), [&Die](const DWARFDebugNames::Entry &E) { + return E.getDIESectionOffset() == uint64_t(Die.getOffset()); + })) { + error() << formatv("Name Index @ {0:x}: Entry for DIE @ {1:x} ({2}) with " + "name {3} missing.\n", + NI.getUnitOffset(), Die.getOffset(), Die.getTag(), + Name); + ++NumErrors; + } + } + return NumErrors; +} + unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection, const DataExtractor &StrData) { unsigned NumErrors = 0; @@ -1171,6 +1307,16 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection, for (uint64_t Name = 1; Name <= NI.getNameCount(); ++Name) NumErrors += verifyNameIndexEntries(NI, Name, StrData); + if (NumErrors > 0) + return NumErrors; + + for (const std::unique_ptr &CU : DCtx.compile_units()) { + if (const DWARFDebugNames::NameIndex *NI = + AccelTable.getCUNameIndex(CU->getOffset())) { + for (const DWARFDebugInfoEntry &Die : CU->dies()) + NumErrors += verifyNameIndexCompleteness(DWARFDie(CU.get(), &Die), *NI); + } + } return NumErrors; } diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s new file mode 100644 index 0000000000000..359dce9b0b929 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s @@ -0,0 +1,195 @@ +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj -o - | not llvm-dwarfdump -verify - | FileCheck %s + +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x10 (DW_TAG_namespace) with name namesp missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x15 (DW_TAG_variable) with name var_block_addr missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x25 (DW_TAG_namespace) with name (anonymous namespace) missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x26 (DW_TAG_variable) with name var_loc_addr missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x30 (DW_TAG_variable) with name var_loc_tls missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x37 (DW_TAG_variable) with name var_loc_gnu_tls missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x3e (DW_TAG_subprogram) with name fun_name missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x3e (DW_TAG_subprogram) with name _Z8fun_name missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x4f (DW_TAG_inlined_subroutine) with name fun_inline missing. +# CHECK: error: Name Index @ 0x0: Entry for DIE @ 0x64 (DW_TAG_label) with name label missing. + + .section .debug_str,"MS",@progbits,1 +.Linfo_producer: + .asciz "hand-written DWARF" +.Lname_var_block_addr: + .asciz "var_block_addr" +.Lname_var_loc_addr: + .asciz "var_loc_addr" +.Lname_var_loc_tls: + .asciz "var_loc_tls" +.Lname_var_loc_gnu_tls: + .asciz "var_loc_gnu_tls" +.Lname_fun_name: + .asciz "fun_name" +.Lname_fun_link_name: + .asciz "_Z8fun_name" +.Lname_fun_inline: + .asciz "fun_inline" +.Lnamespace: + .asciz "namesp" +.Lname_label: + .asciz "label" + + .section .debug_loc,"",@progbits +.Ldebug_loc0: + .quad 0 + .quad 1 + .short .Lloc0_end-.Lloc0_start # Loc expr size +.Lloc0_start: + .byte 3 # DW_OP_addr + .quad 0x47 +.Lloc0_end: + .quad 0 + .quad 0 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 82 # DW_AT_entry_pc + .byte 1 # DW_FORM_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 4 # Abbreviation Code + .byte 57 # DW_TAG_namespace + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 5 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 6 # Abbreviation Code + .byte 57 # DW_TAG_namespace + .byte 1 # DW_CHILDREN_yes + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 7 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 1 # DW_FORM_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 8 # Abbreviation Code + .byte 10 # DW_TAG_label + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 82 # DW_AT_entry_pc + .byte 1 # DW_FORM_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Linfo_producer # DW_AT_producer + + .byte 4 # Abbrev [4] DW_TAG_namespace + .long .Lnamespace # DW_AT_name + .byte 2 # Abbrev [2] DW_TAG_variable + .long .Lname_var_block_addr # DW_AT_name + .byte 9 # DW_AT_location + .byte 3 # DW_OP_addr + .quad 0x47 + .byte 0 # End Of Children Mark + + .byte 6 # Abbrev [6] DW_TAG_namespace + .byte 5 # Abbrev [5] DW_TAG_variable + .long .Lname_var_loc_addr # DW_AT_name + .long .Ldebug_loc0 # DW_AT_location + .byte 0 # End Of Children Mark + + .byte 2 # Abbrev [2] DW_TAG_variable + .long .Lname_var_loc_tls # DW_AT_name + .byte 1 # DW_AT_location + .byte 0x9b # DW_OP_form_tls_address + + .byte 2 # Abbrev [2] DW_TAG_variable + .long .Lname_var_loc_gnu_tls # DW_AT_name + .byte 1 # DW_AT_location + .byte 0xe0 # DW_OP_GNU_push_tls_address + + .byte 3 # Abbrev [3] DW_TAG_subprogram + .long .Lname_fun_name # DW_AT_name + .long .Lname_fun_link_name # DW_AT_linkage_name + .quad 0x47 # DW_AT_entry_pc + .byte 7 # Abbrev [7] DW_TAG_inlined_subroutine + .long .Lname_fun_inline # DW_AT_name + .quad 0x48 # DW_AT_low_pc + .quad 0x49 # DW_AT_high_pc + .byte 8 # Abbrev [8] DW_TAG_label + .long .Lname_label # DW_AT_name + .quad 0x4a # DW_AT_entry_pc + .byte 0 # End Of Children Mark + + .byte 0 # End Of Children Mark +.Lcu_end0: + + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: contribution length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 0 # Header: bucket count + .long 0 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 0 # Header: augmentation length + .long .Lcu_begin0 # Compilation unit 0 +.Lnames_abbrev_start0: + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames_end0: +