Skip to content

Commit

Permalink
[ELF] --gdb-index: skip SHF_GROUP .debug_info
Browse files Browse the repository at this point in the history
-gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections.  All
except one are type units (.debug_types before DWARF v5). When constructing
.gdb_index, we should ignore these type units. We use a simple heuristic: the
compile unit does not have the SHF_GROUP flag. (This needs to be revisited if
people place compile unit .debug_info in COMDAT groups.)

This issue manifests as a data race: because an object file may have multiple
.debug_info sections, we may concurrently construct `LLDDwarfObj` for the same
file in multiple threads. The threads may access `InputSectionBase::data()`
concurrently on the same input section. `InputSectionBase::data()` does a lazy
uncompress() and rewrites the member variable `rawData`. A thread running zlib
`inflate()` (transitively called by uncompress()) on a buffer with `rawData`
tampered by another thread may fail with `uncompress failed: zlib error: Z_DATA_ERROR`.

Even if no data race occurred in an optimistic run, if there are N .debug_info,
one CU entry and its address ranges will be replicated N times. The result
.gdb_index can be much larger than a correct one.

The new test gdb-index-dwarf5-type-unit.s actually has two compile units. This
cannot be produced with regular approaches (it can be produced with -r
--unique). This is used to demonstrate that the .gdb_index construction code
only considers the last non-SHF_GROUP .debug_info

Reviewed By: grimar

Differential Revision: https://reviews.llvm.org/D85579
  • Loading branch information
MaskRay committed Aug 13, 2020
1 parent 19d7cc2 commit fb14129
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 26 deletions.
22 changes: 20 additions & 2 deletions lld/ELF/DWARF.cpp
Expand Up @@ -26,7 +26,12 @@ using namespace lld;
using namespace lld::elf;

template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
for (InputSectionBase *sec : obj->getSections()) {
// Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below.
ArrayRef<typename ELFT::Shdr> objSections =
CHECK(obj->getObj().sections(), obj);
assert(objSections.size() == obj->getSections().size());
for (auto it : llvm::enumerate(obj->getSections())) {
InputSectionBase *sec = it.value();
if (!sec)
continue;

Expand All @@ -35,7 +40,6 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
.Case(".debug_addr", &addrSection)
.Case(".debug_gnu_pubnames", &gnuPubnamesSection)
.Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
.Case(".debug_info", &infoSection)
.Case(".debug_loclists", &loclistsSection)
.Case(".debug_ranges", &rangesSection)
.Case(".debug_rnglists", &rnglistsSection)
Expand All @@ -53,6 +57,20 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
strSection = toStringRef(sec->data());
else if (sec->name == ".debug_line_str")
lineStrSection = toStringRef(sec->data());
else if (sec->name == ".debug_info" &&
!(objSections[it.index()].sh_flags & ELF::SHF_GROUP)) {
// In DWARF v5, -fdebug-types-section places type units in .debug_info
// sections in COMDAT groups. They are not compile units and thus should
// be ignored for .gdb_index/diagnostics purposes.
//
// We use a simple heuristic: the compile unit does not have the SHF_GROUP
// flag. If we place compile units in COMDAT groups in the future, we may
// need to perform a lightweight parsing. We drop the SHF_GROUP flag when
// the InputSection was created, so we need to retrieve sh_flags from the
// associated ELF section header.
infoSection.Data = toStringRef(sec->data());
infoSection.sec = sec;
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions lld/ELF/DWARF.h
Expand Up @@ -32,6 +32,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
f(infoSection);
}

InputSection *getInfoSection() const {
return cast<InputSection>(infoSection.sec);
}

const llvm::DWARFSection &getLoclistsSection() const override {
return loclistsSection;
}
Expand Down
49 changes: 25 additions & 24 deletions lld/ELF/SyntheticSections.cpp
Expand Up @@ -2695,15 +2695,6 @@ void GdbIndexSection::initOutputSize() {
}
}

static std::vector<InputSection *> getDebugInfoSections() {
std::vector<InputSection *> ret;
for (InputSectionBase *s : inputSections)
if (InputSection *isec = dyn_cast<InputSection>(s))
if (isec->name == ".debug_info")
ret.push_back(isec);
return ret;
}

static std::vector<GdbIndexSection::CuEntry> readCuList(DWARFContext &dwarf) {
std::vector<GdbIndexSection::CuEntry> ret;
for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units())
Expand Down Expand Up @@ -2857,30 +2848,40 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs,

// Returns a newly-created .gdb_index section.
template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
std::vector<InputSection *> sections = getDebugInfoSections();

// .debug_gnu_pub{names,types} are useless in executables.
// They are present in input object files solely for creating
// a .gdb_index. So we can remove them from the output.
for (InputSectionBase *s : inputSections)
// Collect InputFiles with .debug_info. See the comment in
// LLDDwarfObj<ELFT>::LLDDwarfObj. If we do lightweight parsing in the future,
// note that isec->data() may uncompress the full content, which should be
// parallelized.
SetVector<InputFile *> files;
for (InputSectionBase *s : inputSections) {
InputSection *isec = dyn_cast<InputSection>(s);
if (!isec)
continue;
// .debug_gnu_pub{names,types} are useless in executables.
// They are present in input object files solely for creating
// a .gdb_index. So we can remove them from the output.
if (s->name == ".debug_gnu_pubnames" || s->name == ".debug_gnu_pubtypes")
s->markDead();
else if (isec->name == ".debug_info")
files.insert(isec->file);
}

std::vector<GdbChunk> chunks(sections.size());
std::vector<std::vector<NameAttrEntry>> nameAttrs(sections.size());
std::vector<GdbChunk> chunks(files.size());
std::vector<std::vector<NameAttrEntry>> nameAttrs(files.size());

parallelForEachN(0, sections.size(), [&](size_t i) {
parallelForEachN(0, files.size(), [&](size_t i) {
// To keep memory usage low, we don't want to keep cached DWARFContext, so
// avoid getDwarf() here.
ObjFile<ELFT> *file = sections[i]->getFile<ELFT>();
ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]);
DWARFContext dwarf(std::make_unique<LLDDwarfObj<ELFT>>(file));
auto &dobj = static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj());

chunks[i].sec = sections[i];
// If the are multiple compile units .debug_info (very rare ld -r --unique),
// this only picks the last one. Other address ranges are lost.
chunks[i].sec = dobj.getInfoSection();
chunks[i].compilationUnits = readCuList(dwarf);
chunks[i].addressAreas = readAddressAreas(dwarf, sections[i]);
nameAttrs[i] = readPubNamesAndTypes<ELFT>(
static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj()),
chunks[i].compilationUnits);
chunks[i].addressAreas = readAddressAreas(dwarf, chunks[i].sec);
nameAttrs[i] = readPubNamesAndTypes<ELFT>(dobj, chunks[i].compilationUnits);
});

auto *ret = make<GdbIndexSection>();
Expand Down
93 changes: 93 additions & 0 deletions lld/test/ELF/gdb-index-dwarf5-type-unit.s
@@ -0,0 +1,93 @@
# REQUIRES: x86, zlib
## -gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections.
## All except one are type units. Test we can locate the compile unit, add it to
## the index, and not erroneously duplicate it (which would happen if we
## consider every .debug_info a compile unit).

# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t
# RUN: llvm-dwarfdump --gdb-index %t | FileCheck %s

## Test we don't uncompress a section while another thread is concurrently
## accessing it. This would be detected by tsan as a data race.
# RUN: llvm-objcopy --compress-debug-sections %t.o
# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t1
# RUN: llvm-dwarfdump --gdb-index %t1 | FileCheck %s

## In this test, there are actually two compile unit .debug_info (very uncommon;
## -r --unique). Currently we only handle the last compile unit.
# CHECK: CU list offset = 0x18, has 1 entries:
# CHECK-NEXT: 0: Offset = 0x32, Length = 0x19

# CHECK: Address area offset = 0x28, has 1 entries:
# CHECK-NEXT: Low/High address = [0x1001, 0x1002) (Size: 0x1), CU id = 0

.Lfunc_begin0:
ret
.Lfunc_end0:
.Lfunc_begin1:
ret
.Lfunc_end1:

.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 65 # DW_TAG_type_unit
.byte 0 # DW_CHILDREN_no
.byte 0 # EOM(1)
.byte 0 # EOM(2)

.byte 2 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 0 # DW_CHILDREN_no
.byte 17 # DW_AT_low_pc
.byte 1 # DW_FORM_addr
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 0 # EOM(1)
.byte 0 # EOM(2)

.byte 0 # EOM(3)

.macro TYPE_UNIT id signature
.section .debug_info,"G",@progbits,\signature
.long .Ldebug_info_end\id-.Ldebug_info_start\id # Length of Unit
.Ldebug_info_start\id:
.short 5 # DWARF version number
.byte 2 # DWARF Unit Type
.byte 8 # Address Size
.long .debug_abbrev # Offset Into Abbrev. Section
.quad \signature # Type Signature
.long .Ldebug_info_end\id # Type DIE Offset
.byte 1 # Abbrev [1] DW_TAG_type_unit
.Ldebug_info_end\id:
.endm

## We place compile units between two type units (rare). A naive approach will
## take either the first or the last .debug_info
TYPE_UNIT 0, 123

.section .debug_info,"",@progbits,unique,0
.Lcu_begin0:
.long .Lcu_end0-.Lcu_begin0-4 # Length of Unit
.short 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 8 # Address Size
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 2 # Abbrev [2] DW_TAG_compile_unit
.quad .Lfunc_begin0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.Lcu_end0:

.section .debug_info,"",@progbits,unique,1
.Lcu_begin1:
.long .Lcu_end1-.Lcu_begin1-4 # Length of Unit
.short 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 8 # Address Size
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 2 # Abbrev [2] DW_TAG_compile_unit
.quad .Lfunc_begin1 # DW_AT_low_pc
.long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc
.Lcu_end1:

TYPE_UNIT 1, 456

0 comments on commit fb14129

Please sign in to comment.