-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Modify llvm-gsymutil to ignore invalid file indexes #71431
Conversation
@llvm/pr-subscribers-debuginfo Author: Greg Clayton (clayborg) ChangesDWARF produced by LTO and BOLT can sometimes be broken where file indexes are beyond the end of the line table's file list in the prologue. This patch allows llvm-gsymutil to convert this DWARF without crashing, and emits errors when: line table contains entries with an invalid file index (line entry will be removed) inline functions that have invalid DW_AT_call_file file indexes when there are no line table entries for a function and we fall back to making a single line table entry from the functions DW_AT_decl_file/DW_AT_decl_line attributes, we make sure the DW_AT_decl_file attribute is valid before emitting it. Full diff: https://github.com/llvm/llvm-project/pull/71431.diff 2 Files Affected:
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 7809fd65bfc8f9d..0b225376349ece6 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -65,10 +65,10 @@ struct llvm::gsym::CUInfo {
/// the first client that asks for a compile unit file index will end up
/// doing the conversion, and subsequent clients will get the cached GSYM
/// index.
- uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) {
- if (!LineTable)
- return 0;
- assert(DwarfFileIdx < FileCache.size());
+ std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
+ uint32_t DwarfFileIdx) {
+ if (!LineTable || DwarfFileIdx >= FileCache.size())
+ return std::nullopt;
uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
if (GsymFileIdx != UINT32_MAX)
return GsymFileIdx;
@@ -272,14 +272,24 @@ static void parseInlineInfo(GsymCreator &Gsym, raw_ostream *Log, CUInfo &CUI,
if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
II.Name = *NameIndex;
- II.CallFile = CUI.DWARFToGSYMFileIndex(
- Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0));
- II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
- // parse all children and append to parent
- for (DWARFDie ChildDie : Die.children())
- parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
- AllInlineRanges, WarnIfEmpty);
- Parent.Children.emplace_back(std::move(II));
+ const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+ Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
+ std::optional<uint32_t> OptGSymFileIdx =
+ CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
+ if (OptGSymFileIdx) {
+ II.CallFile = OptGSymFileIdx.value();
+ II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
+ // parse all children and append to parent
+ for (DWARFDie ChildDie : Die.children())
+ parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
+ AllInlineRanges, WarnIfEmpty);
+ Parent.Children.emplace_back(std::move(II));
+ } else if (Log) {
+ *Log << "error: inlined function DIE at " << HEX32(Die.getOffset())
+ << " has an invalid file index " << DwarfFileIdx
+ << " in its DW_AT_call_file attribute, this inline entry and all "
+ << "children will be removed.\n";
+ }
return;
}
if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
@@ -306,8 +316,20 @@ static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
// the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
std::string FilePath = Die.getDeclFile(
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
- if (FilePath.empty())
+ if (FilePath.empty()) {
+ // If we had a DW_AT_decl_file, but got no file then we need to emit a
+ // warning.
+ if (Log) {
+ const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+ Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
+ *Log << "error: function DIE at " << HEX32(Die.getOffset())
+ << " has an invalid file index " << DwarfFileIdx
+ << " in its DW_AT_decl_file attribute, unable to create a single "
+ << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
+ << "attributes.\n";
+ }
return;
+ }
if (auto Line =
dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
@@ -322,7 +344,20 @@ static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
for (uint32_t RowIndex : RowVector) {
// Take file number and line/column from the row.
const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
- const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
+ std::optional<uint32_t> OptFileIdx =
+ CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
+ if (!OptFileIdx) {
+ if (Log) {
+ *Log << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
+ << "a line entry with invalid DWARF file index, this entry will "
+ << "be removed:\n";
+ Row.dumpTableHeader(*Log, /*Indent=*/0);
+ Row.dump(*Log);
+ *Log << "\n";
+ }
+ continue;
+ }
+ const uint32_t FileIdx = OptFileIdx.value();
uint64_t RowAddress = Row.Address.Address;
// Watch out for a RowAddress that is in the middle of a line table entry
// in the DWARF. If we pass an address in between two line table entries
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index bfc6efc1bbb44e4..53de96cc6953c2d 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -4324,4 +4324,360 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
ASSERT_THAT_EXPECTED(LR, Succeeded());
SourceLocation src_loc = {"foo", "/tmp", "main.cpp", 12, 16};
EXPECT_THAT(LR->Locations, testing::ElementsAre(src_loc));
-git}
+}
+
+TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
+ // Test that llvm-gsymutil can handle invalid file indexes in the following
+ // cases:
+ // - In line entries in the line table
+ // - When parsing inline entries that have a DW_AT_call_file
+ // - When parsing function dies with no line table entries and it tries to
+ // use the DW_AT_decl_file
+ //
+ //
+ // 0x0000000b: DW_TAG_compile_unit
+ // DW_AT_name ("/tmp/main.cpp")
+ // DW_AT_language (DW_LANG_C)
+ // DW_AT_stmt_list (0x00000000)
+ //
+ // 0x00000015: DW_TAG_subprogram
+ // DW_AT_name ("foo")
+ // DW_AT_low_pc (0x0000000000001000)
+ // DW_AT_high_pc (0x0000000000001050)
+ //
+ // 0x0000002a: DW_TAG_inlined_subroutine
+ // DW_AT_name ("inline_with_invalid_call_file")
+ // DW_AT_low_pc (0x0000000000001010)
+ // DW_AT_high_pc (0x0000000000001020)
+ // DW_AT_call_file (0x0000000a)
+ // DW_AT_call_line (11)
+ //
+ // 0x00000047: DW_TAG_inlined_subroutine
+ // DW_AT_name
+ // ("inline_inside_parent_with_invalid_call_file")
+ // DW_AT_low_pc (0x0000000000001010)
+ // DW_AT_high_pc (0x0000000000001015)
+ // DW_AT_call_file ("/tmp/main.cpp")
+ // DW_AT_call_line (12)
+ //
+ // 0x00000064: NULL
+ //
+ // 0x00000065: DW_TAG_inlined_subroutine
+ // DW_AT_name ("inline_with_valid_call_file")
+ // DW_AT_low_pc (0x0000000000001020)
+ // DW_AT_high_pc (0x0000000000001030)
+ // DW_AT_call_file ("/tmp/main.cpp")
+ // DW_AT_call_line (13)
+ //
+ // 0x00000082: DW_TAG_inlined_subroutine
+ // DW_AT_name
+ // ("inline_inside_parent_with_valid_call_file")
+ // DW_AT_low_pc (0x0000000000001020)
+ // DW_AT_high_pc (0x0000000000001025)
+ // DW_AT_call_file ("/tmp/main.cpp")
+ // DW_AT_call_line (14)
+ //
+ // 0x0000009f: NULL
+ //
+ // 0x000000a0: NULL
+ //
+ // 0x000000a1: DW_TAG_subprogram
+ // DW_AT_name ("func_with_valid_decl_file")
+ // DW_AT_decl_file ("/tmp/main.cpp")
+ // DW_AT_decl_line (20)
+ // DW_AT_low_pc (0x0000000000002000)
+ // DW_AT_high_pc (0x0000000000002050)
+ //
+ // 0x000000b8: DW_TAG_subprogram
+ // DW_AT_name ("func_with_invalid_decl_file")
+ // DW_AT_decl_file (0x0a)
+ // DW_AT_decl_line (20)
+ // DW_AT_low_pc (0x0000000000003000)
+ // DW_AT_high_pc (0x0000000000003050)
+ //
+ // 0x000000cf: NULL
+ //
+ // The table looks has an entry at address 0x0000000000001010 that has an
+ // invalid file index that needs to be removed.
+ //
+ // Address Line Column File ISA Discriminator Flags
+ // ---------- ------ ------ ------ --- ------------- -------------
+ // 0x00001000 10 0 1 0 0 is_stmt
+ // 0x00001010 11 0 10 0 0 is_stmt
+ // 0x00001020 11 0 1 0 0 is_stmt
+ // 0x00001030 12 0 1 0 0 is_stmt
+ // 0x00001050 12 0 1 0 0 is_stmt end_sequence
+
+ StringRef yamldata = R"(
+ debug_str:
+ - ''
+ - '/tmp/main.cpp'
+ - foo
+ - inline_with_invalid_call_file
+ - inline_inside_parent_with_invalid_call_file
+ - inline_with_valid_call_file
+ - inline_inside_parent_with_valid_call_file
+ - func_with_valid_decl_file
+ - func_with_invalid_decl_file
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Attribute: DW_AT_stmt_list
+ Form: DW_FORM_sec_offset
+ - Code: 0x2
+ Tag: DW_TAG_subprogram
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_addr
+ - Code: 0x3
+ Tag: DW_TAG_inlined_subroutine
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_call_file
+ Form: DW_FORM_data4
+ - Attribute: DW_AT_call_line
+ Form: DW_FORM_data4
+ - Code: 0x4
+ Tag: DW_TAG_inlined_subroutine
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_call_file
+ Form: DW_FORM_data4
+ - Attribute: DW_AT_call_line
+ Form: DW_FORM_data4
+ - Code: 0x5
+ Tag: DW_TAG_subprogram
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_decl_file
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_decl_line
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_addr
+ debug_info:
+ - Length: 0xCC
+ Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x1
+ - Value: 0x2
+ - Value: 0x0
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0xF
+ - Value: 0x1000
+ - Value: 0x1050
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x13
+ - Value: 0x1010
+ - Value: 0x1020
+ - Value: 0xA
+ - Value: 0xB
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0x31
+ - Value: 0x1010
+ - Value: 0x1015
+ - Value: 0x1
+ - Value: 0xC
+ - AbbrCode: 0x0
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x5D
+ - Value: 0x1020
+ - Value: 0x1030
+ - Value: 0x1
+ - Value: 0xD
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0x79
+ - Value: 0x1020
+ - Value: 0x1025
+ - Value: 0x1
+ - Value: 0xE
+ - AbbrCode: 0x0
+ - AbbrCode: 0x0
+ - AbbrCode: 0x5
+ Values:
+ - Value: 0xA3
+ - Value: 0x1
+ - Value: 0x14
+ - Value: 0x2000
+ - Value: 0x2050
+ - AbbrCode: 0x5
+ Values:
+ - Value: 0xBD
+ - Value: 0xA
+ - Value: 0x14
+ - Value: 0x3000
+ - Value: 0x3050
+ - AbbrCode: 0x0
+ debug_line:
+ - Length: 78
+ Version: 2
+ PrologueLength: 36
+ MinInstLength: 1
+ DefaultIsStmt: 1
+ LineBase: 251
+ LineRange: 14
+ OpcodeBase: 13
+ StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+ IncludeDirs:
+ - '/tmp'
+ Files:
+ - Name: main.cpp
+ DirIdx: 1
+ ModTime: 0
+ Length: 0
+ Opcodes:
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 9
+ SubOpcode: DW_LNE_set_address
+ Data: 4096
+ - Opcode: DW_LNS_advance_line
+ SData: 9
+ Data: 0
+ - Opcode: DW_LNS_copy
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 16
+ - Opcode: DW_LNS_set_file
+ Data: 10
+ - Opcode: DW_LNS_advance_line
+ SData: 1
+ Data: 0
+ - Opcode: DW_LNS_copy
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 16
+ - Opcode: DW_LNS_set_file
+ Data: 1
+ - Opcode: DW_LNS_copy
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 16
+ - Opcode: DW_LNS_advance_line
+ SData: 1
+ Data: 0
+ - Opcode: DW_LNS_copy
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 32
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 1
+ SubOpcode: DW_LNE_end_sequence
+ Data: 0
+ )";
+ auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata);
+ ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded());
+ std::unique_ptr<DWARFContext> DwarfContext =
+ DWARFContext::create(*ErrOrSections, 8);
+ ASSERT_TRUE(DwarfContext.get() != nullptr);
+ std::string errors;
+ raw_string_ostream OS(errors);
+ GsymCreator GC;
+ DwarfTransformer DT(*DwarfContext, GC);
+ const uint32_t ThreadCount = 1;
+ ASSERT_THAT_ERROR(DT.convert(ThreadCount, &OS), Succeeded());
+ ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded());
+ OS.flush();
+ SmallString<512> Str;
+ raw_svector_ostream OutStrm(Str);
+ const auto ByteOrder = llvm::endianness::native;
+ FileWriter FW(OutStrm, ByteOrder);
+ ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+ // There should be one function in our GSYM.
+ EXPECT_EQ(GR->getNumAddresses(), 3u);
+ // Verify "foo" is present and has a line table and no inline info.
+ auto ExpFI = GR->getFunctionInfo(0x1000);
+ ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
+ ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x1050));
+ StringRef FuncName = GR->getString(ExpFI->Name);
+ EXPECT_EQ(FuncName, "foo");
+
+ EXPECT_TRUE(ExpFI->OptLineTable.has_value());
+ // Make sure we only have 3 entries to show we removed the line entry with
+ // the invalid file index whose address is 0x0000000000001010.
+ ASSERT_EQ(ExpFI->OptLineTable->size(), 3u);
+ EXPECT_TRUE(ExpFI->Inline.has_value());
+
+ // Make sure that we only have one inline function, not two. We remove one of
+ // the inline functions because it has an invalid DW_AT_call_file attribute.
+ ASSERT_EQ(ExpFI->Inline->Children.size(), 1u);
+ StringRef InlineName = GR->getString(ExpFI->Inline->Children[0].Name);
+ EXPECT_EQ(InlineName, "inline_with_valid_call_file");
+
+ ExpFI = GR->getFunctionInfo(0x0000000000002000);
+ ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
+ ASSERT_EQ(ExpFI->Range, AddressRange(0x2000, 0x2050));
+ FuncName = GR->getString(ExpFI->Name);
+ EXPECT_EQ(FuncName, "func_with_valid_decl_file");
+ EXPECT_FALSE(ExpFI->Inline.has_value());
+ // Make sure we only have 1 entry in the line table which indicates we were
+ // able to parse the DW_AT_decl_file/DW_AT_decl_line correctly.
+ EXPECT_TRUE(ExpFI->OptLineTable.has_value());
+ ASSERT_EQ(ExpFI->OptLineTable->size(), 1u);
+
+ ExpFI = GR->getFunctionInfo(0x0000000000003000);
+ ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
+ ASSERT_EQ(ExpFI->Range, AddressRange(0x3000, 0x3050));
+ FuncName = GR->getString(ExpFI->Name);
+ EXPECT_EQ(FuncName, "func_with_invalid_decl_file");
+ EXPECT_FALSE(ExpFI->Inline.has_value());
+ // Make sure we only no line table because there are no line entries in the
+ // line table and the DW_AT_decl_file attribute was invalid so we were not
+ // able to parse the DW_AT_decl_file/DW_AT_decl_line correctly.
+ EXPECT_FALSE(ExpFI->OptLineTable.has_value());
+
+ // Make sure we don't see spurious errors in the output:
+ std::vector<std::string> ExpectedLogErrors = {
+ "error: function DIE at 0x00000015 has a line entry with invalid DWARF "
+ "file index, this entry will be removed:",
+ "error: inlined function DIE at 0x0000002a has an invalid file index 10 "
+ "in its DW_AT_call_file attribute, this inline entry and all children "
+ "will be removed.",
+ "error: function DIE at 0x000000b8 has an invalid file index 10 in its "
+ "DW_AT_decl_file attribute, unable to create a single line entry from "
+ "the DW_AT_decl_file/DW_AT_decl_line attributes."};
+ // Make sure all expected errors are in the error stream for the two invalid
+ // inlined functions that we removed due to invalid range scoping.
+ for (const auto &Error : ExpectedLogErrors)
+ EXPECT_TRUE(errors.find(Error) != std::string::npos);
+}
|
DWARF produced by LTO and BOLT can sometimes be broken where file indexes are beyond the end of the line table's file list in the prologue. This patch allows llvm-gsymutil to convert this DWARF without crashing, and emits errors when: line table contains entries with an invalid file index (line entry will be removed) inline functions that have invalid DW_AT_call_file file indexes when there are no line table entries for a function and we fall back to making a single line table entry from the functions DW_AT_decl_file/DW_AT_decl_line attributes, we make sure the DW_AT_decl_file attribute is valid before emitting it.
00d47ac
to
62886c9
Compare
This is hard to follow - I guess it's mean to be a bulleted list of the different cases/how they're handled, but ends up as a big run-on sentence? |
Bugs are always possible, but I'm pretty sure LTO would haev a hard time producing this sort of bug... - maybe BOLT's the root cause? In any case, as noted previously, some kind of bug reports would be appreciated. |
For once it is not. :D So I dug into it internally a bit. Looks like issue was with llvm-12, and goes away with llvm-15 (version we are migrating to currently). |
DWARF produced by LTO and BOLT can sometimes be broken where file indexes are beyond the end of the line table's file list in the prologue. This patch allows llvm-gsymutil to convert this DWARF without crashing, and emits errors when:
line table contains entries with an invalid file index (line entry will be removed) inline functions that have invalid DW_AT_call_file file indexes when there are no line table entries for a function and we fall back to making a single line table entry from the functions DW_AT_decl_file/DW_AT_decl_line attributes, we make sure the DW_AT_decl_file attribute is valid before emitting it.