Skip to content

Commit

Permalink
[DebugInfo] Add error checking around data extraction in DWARFAbbrevi…
Browse files Browse the repository at this point in the history
…ationDeclaration::extract

In trying to hoist errors further up this callstack, I discovered that
if the data in the debug_abbrev section is invalid entirely, the code
that parses the debug_abbrev section may do strange or unpredictable
things. The underlying issue is that DataExtractor will return a value
of 0 when it encounters an error in extracting a LEB128 value. It's thus
difficult to determine if there was an error just by looking at the
return value. This patch aims to bail at the first sight of an error in
the debug_abbrev parsing code.

Differential Revision: https://reviews.llvm.org/D151755
  • Loading branch information
bulbazord committed Jun 6, 2023
1 parent c1401e9 commit 6836a47
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 5 deletions.
2 changes: 2 additions & 0 deletions lld/test/MachO/stabs-dwarf5.s
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ Lsection_abbrev:
.byte 114 ## DW_AT_str_offsets_base
.byte 23 ## DW_FORM_sec_offset
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
Expand Down
2 changes: 2 additions & 0 deletions lld/test/MachO/stabs-icf.s
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ Lsection_abbrev:
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
Expand Down
4 changes: 4 additions & 0 deletions lld/test/MachO/stabs.s
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ Lsection_abbrev:
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
Expand Down Expand Up @@ -254,6 +256,8 @@ Lsection_abbrev:
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 0 ## EOM(3)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
Expand Down
25 changes: 20 additions & 5 deletions llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,28 @@ llvm::Expected<DWARFAbbreviationDeclaration::ExtractState>
DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) {
clear();
const uint64_t Offset = *OffsetPtr;
Code = Data.getULEB128(OffsetPtr);
Error Err = Error::success();
Code = Data.getULEB128(OffsetPtr, &Err);
if (Err)
return Err;

if (Code == 0)
return ExtractState::Complete;

CodeByteSize = *OffsetPtr - Offset;
Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr));
Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr, &Err));
if (Err)
return Err;

if (Tag == DW_TAG_null) {
clear();
return make_error<llvm::object::GenericBinaryError>(
"abbreviation declaration requires a non-null tag");
}
uint8_t ChildrenByte = Data.getU8(OffsetPtr);
uint8_t ChildrenByte = Data.getU8(OffsetPtr, &Err);
if (Err)
return Err;

HasChildren = (ChildrenByte == DW_CHILDREN_yes);
// Assign a value to our optional FixedAttributeSize member variable. If
// this member variable still has a value after the while loop below, then
Expand All @@ -58,8 +68,13 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) {

// Read all of the abbreviation attributes and forms.
while (Data.isValidOffset(*OffsetPtr)) {
auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr));
auto F = static_cast<Form>(Data.getULEB128(OffsetPtr));
auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr, &Err));
if (Err)
return Err;

auto F = static_cast<Form>(Data.getULEB128(OffsetPtr, &Err));
if (Err)
return Err;

// We successfully reached the end of this abbreviation declaration
// since both attribute and form are zero. There may be more abbreviation
Expand Down
175 changes: 175 additions & 0 deletions llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ void writeAbbreviationDeclarations(raw_ostream &OS, uint32_t FirstCode,
encodeULEB128(0, OS);
}

void writeMalformedULEB128Value(raw_ostream &OS) {
OS << static_cast<uint8_t>(0x80);
}

void writeULEB128LargerThan64Bits(raw_ostream &OS) {
static constexpr llvm::StringRef LargeULEB128 =
"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01";
OS << LargeULEB128;
}

TEST(DWARFDebugAbbrevTest, DWARFAbbrevDeclSetExtractSuccess) {
SmallString<64> RawData;
raw_svector_ostream OS(RawData);
Expand Down Expand Up @@ -104,3 +114,168 @@ TEST(DWARFDebugAbbrevTest, DWARFAbbrevDeclSetExtractSuccessOutOfOrder) {
EXPECT_FALSE(Abbrev1->hasChildren());
EXPECT_EQ(Abbrev1->getNumAttributes(), 1u);
}

TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetCodeExtractionError) {
SmallString<64> RawData;

// Check for malformed ULEB128.
{
raw_svector_ostream OS(RawData);
writeMalformedULEB128Value(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_FALSE(AbbrevSet.extract(Data, &Offset));
EXPECT_EQ(Offset, 0u);
}

RawData.clear();
// Check for ULEB128 too large to fit into a uin64_t.
{
raw_svector_ostream OS(RawData);
writeULEB128LargerThan64Bits(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_FALSE(AbbrevSet.extract(Data, &Offset));
EXPECT_EQ(Offset, 0u);
}
}

TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetTagExtractionError) {
SmallString<64> RawData;
const uint32_t Code = 1;

// Check for malformed ULEB128.
{
raw_svector_ostream OS(RawData);
encodeULEB128(Code, OS);
writeMalformedULEB128Value(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
// Only the code was extracted correctly.
EXPECT_EQ(Offset, 1u);
}

RawData.clear();
// Check for ULEB128 too large to fit into a uint64_t.
{
raw_svector_ostream OS(RawData);
encodeULEB128(Code, OS);
writeULEB128LargerThan64Bits(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
// Only the code was extracted correctly.
EXPECT_EQ(Offset, 1u);
}
}

TEST(DWARFDebugAbbrevTest, DWARFAbbreviatioDeclSetChildExtractionError) {
SmallString<64> RawData;
const uint32_t Code = 1;
const dwarf::Tag Tag = DW_TAG_compile_unit;

// We want to make sure that we fail if we reach the end of the stream before
// reading the 'children' byte.
raw_svector_ostream OS(RawData);
encodeULEB128(Code, OS);
encodeULEB128(Tag, OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
// The code and the tag were extracted correctly.
EXPECT_EQ(Offset, 2u);
}

TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetAttributeExtractionError) {
SmallString<64> RawData;
const uint32_t Code = 1;
const dwarf::Tag Tag = DW_TAG_compile_unit;
const uint8_t Children = DW_CHILDREN_yes;

// Check for malformed ULEB128.
{
raw_svector_ostream OS(RawData);
encodeULEB128(Code, OS);
encodeULEB128(Tag, OS);
OS << Children;
writeMalformedULEB128Value(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
// The code, tag, and child byte were extracted correctly.
EXPECT_EQ(Offset, 3u);
}

RawData.clear();
// Check for ULEB128 too large to fit into a uint64_t.
{
raw_svector_ostream OS(RawData);
encodeULEB128(Code, OS);
encodeULEB128(Tag, OS);
OS << Children;
writeULEB128LargerThan64Bits(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
// The code, tag, and child byte were extracted correctly.
EXPECT_EQ(Offset, 3u);
}
}

TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetFormExtractionError) {
SmallString<64> RawData;
const uint32_t Code = 1;
const dwarf::Tag Tag = DW_TAG_compile_unit;
const uint8_t Children = DW_CHILDREN_yes;
const dwarf::Attribute Attr = DW_AT_name;

// Check for malformed ULEB128.
{
raw_svector_ostream OS(RawData);
encodeULEB128(Code, OS);
encodeULEB128(Tag, OS);
OS << Children;
encodeULEB128(Attr, OS);
writeMalformedULEB128Value(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
// The code, tag, child byte, and first attribute were extracted correctly.
EXPECT_EQ(Offset, 4u);
}

RawData.clear();
// Check for ULEB128 too large to fit into a uint64_t.
{
raw_svector_ostream OS(RawData);
encodeULEB128(Code, OS);
encodeULEB128(Tag, OS);
OS << Children;
encodeULEB128(Attr, OS);
writeULEB128LargerThan64Bits(OS);

uint64_t Offset = 0;
DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
DWARFAbbreviationDeclarationSet AbbrevSet;
EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
// The code, tag, child byte, and first attribute were extracted correctly.
EXPECT_EQ(Offset, 4u);
}
}

0 comments on commit 6836a47

Please sign in to comment.