Skip to content

Commit

Permalink
Improvements for debug info.
Browse files Browse the repository at this point in the history
Summary:
Assembly functions could have no corresponding DW_AT_subprogram
entries, yet they are represented in module ranges (and .debug_aranges)
and will have line number information. Make sure we update those.

Eliminated unnecessary data structures and optimized some passes.

For .debug_loc unused location entries are no longer processed
resulting in smaller output files.

Overall it's a small processing time improvement and memory imporement.

(cherry picked from FBD3362540)
  • Loading branch information
maksfb committed May 28, 2016
1 parent 65ac8bb commit 4460da0
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 181 deletions.
76 changes: 52 additions & 24 deletions bolt/BinaryContext.cpp
Expand Up @@ -116,7 +116,7 @@ void findAddressRangesObjects(
/// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with
/// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that
/// are never called and removed from the binary) in Unknown.
void findSubprograms(const DWARFCompileUnit *Unit,
void findSubprograms(DWARFCompileUnit *Unit,
const DWARFDebugInfoEntryMinimal *DIE,
std::map<uint64_t, BinaryFunction> &BinaryFunctions,
BinaryContext::DIECompileUnitVector &Unknown) {
Expand All @@ -126,7 +126,7 @@ void findSubprograms(const DWARFCompileUnit *Unit,
if (DIE->getLowAndHighPC(Unit, LowPC, HighPC)) {
auto It = BinaryFunctions.find(LowPC);
if (It != BinaryFunctions.end()) {
It->second.addSubprocedureDIE(Unit, DIE);
It->second.addSubprogramDIE(Unit, DIE);
} else {
Unknown.emplace_back(DIE, Unit);
}
Expand All @@ -145,13 +145,8 @@ void findSubprograms(const DWARFCompileUnit *Unit,
namespace llvm {
namespace bolt {

void BinaryContext::preprocessDebugInfo() {
// Iterate over all DWARF compilation units and map their offset in the
// binary to themselves in OffsetDwarfCUMap
for (const auto &CU : DwCtx->compile_units()) {
OffsetToDwarfCU[CU->getOffset()] = CU.get();
}

void BinaryContext::preprocessDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
// Populate MCContext with DWARF files.
for (const auto &CU : DwCtx->compile_units()) {
const auto CUID = CU->getOffset();
Expand All @@ -165,23 +160,53 @@ void BinaryContext::preprocessDebugInfo() {
"";
Ctx->getDwarfFile(Dir, FileNames[I].Name, I + 1, CUID);
}

auto LineTableOffset =
DwCtx->getAttrFieldOffsetForUnit(CU.get(), dwarf::DW_AT_stmt_list);
if (LineTableOffset)
LineTableOffsetCUMap[CUID] = LineTableOffset;
}
}

void BinaryContext::preprocessFunctionDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
// For each CU, iterate over its children DIEs and match subprogram DIEs to
// BinaryFunctions.
for (const auto &CU : DwCtx->compile_units()) {
for (auto &CU : DwCtx->compile_units()) {
findSubprograms(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
UnknownFunctions);
}

// Some functions may not have a corresponding subprogram DIE
// yet they will be included in some CU and will have line number information.
// Hence we need to associate them with the CU and include in CU ranges.
for (auto &AddrFunctionPair : BinaryFunctions) {
auto FunctionAddress = AddrFunctionPair.first;
auto &Function = AddrFunctionPair.second;
if (!Function.getSubprogramDIEs().empty())
continue;
if (auto DebugAranges = DwCtx->getDebugAranges()) {
auto CUOffset = DebugAranges->findAddress(FunctionAddress);
if (CUOffset != -1U) {
Function.addSubprogramDIE(DwCtx->getCompileUnitForOffset(CUOffset),
nullptr);
continue;
}
}

#ifdef DWARF_LOOKUP_ALL_RANGES
// Last resort - iterate over all compile units. This should not happen
// very often. If it does, we need to create a separate lookup table
// similar to .debug_aranges internally. This slows down processing
// considerably.
for (const auto &CU : DwCtx->compile_units()) {
const auto *CUDie = CU->getUnitDIE();
for (const auto &Range : CUDie->getAddressRanges(CU.get())) {
if (FunctionAddress >= Range.first &&
FunctionAddress < Range.second) {
Function.addSubprogramDIE(CU.get(), nullptr);
break;
}
}
}
#endif
}
}

void BinaryContext::preprocessFunctionDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
// Iterate over DIE trees finding objects that contain address ranges.
for (const auto &CU : DwCtx->compile_units()) {
findAddressRangesObjects(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
Expand All @@ -191,15 +216,18 @@ void BinaryContext::preprocessFunctionDebugInfo(
// Iterate over location lists and save them in LocationLists.
auto DebugLoc = DwCtx->getDebugLoc();
for (const auto &DebugLocEntry : DebugLoc->getLocationLists()) {
if (DebugLocEntry.Entries.empty())
continue;
auto StartAddress = DebugLocEntry.Entries.front().Begin;
auto *Function = getBinaryFunctionContainingAddress(StartAddress,
BinaryFunctions);
if (!Function || !Function->isSimple())
continue;
LocationLists.emplace_back(DebugLocEntry.Offset);
auto &LocationList = LocationLists.back();
for (const auto &Location : DebugLocEntry.Entries) {
auto *Function = getBinaryFunctionContainingAddress(Location.Begin,
BinaryFunctions);
if (Function && Function->isSimple()) {
LocationList.addLocation(&Location.Loc, *Function, Location.Begin,
Location.End);
}
LocationList.addLocation(&Location.Loc, *Function, Location.Begin,
Location.End);
}
}
}
Expand Down
16 changes: 3 additions & 13 deletions bolt/BinaryContext.h
Expand Up @@ -63,17 +63,6 @@ class BinaryContext {
// Set of addresses we cannot relocate because we have a direct branch to it.
std::set<uint64_t> InterproceduralBranchTargets;

// Map from offset in the .debug_info section of the binary the
// DWARF Compilation Unit that starts at that offset.
std::map<uint32_t, DWARFCompileUnit *> OffsetToDwarfCU;

// Maps each compile unit to the offset of its .debug_line line table in the
// output file.
std::map<const DWARFCompileUnit *, uint32_t> CompileUnitLineTableOffset;

/// Maps DWARF CUID to offset of stmt_list attribute in .debug_info.
std::map<unsigned, uint32_t> LineTableOffsetCUMap;

/// List of DWARF location lists in .debug_loc.
std::vector<LocationList> LocationLists;

Expand All @@ -86,7 +75,7 @@ class BinaryContext {
std::vector<std::pair<const DWARFDebugInfoEntryMinimal *,
const DWARFCompileUnit *>> ;

/// List of subprocedure DIEs that have addresses that don't match any
/// List of subprogram DIEs that have addresses that don't match any
/// function, along with their CU.
DIECompileUnitVector UnknownFunctions;

Expand Down Expand Up @@ -162,7 +151,8 @@ class BinaryContext {
MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix);

/// Populate some internal data structures with debug info.
void preprocessDebugInfo();
void preprocessDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions);

/// Populate internal data structures with debug info that depends on
/// disassembled functions.
Expand Down
112 changes: 42 additions & 70 deletions bolt/BinaryFunction.cpp
Expand Up @@ -53,22 +53,42 @@ PrintDebugInfo("print-debug-info",

namespace {

// Finds which DWARF compile unit owns an address in the executable by
// querying .debug_aranges.
DWARFCompileUnit *FindCompileUnitForAddress(uint64_t Address,
const BinaryContext &BC) {
auto DebugAranges = BC.DwCtx->getDebugAranges();
if (!DebugAranges)
return nullptr;
/// Gets debug line information for the instruction located at the given
/// address in the original binary. The SMLoc's pointer is used
/// to point to this information, which is represented by a
/// DebugLineTableRowRef. The returned pointer is null if no debug line
/// information for this instruction was found.
SMLoc findDebugLineInformationForInstructionAt(
uint64_t Address,
DWARFUnitLineTable &ULT) {
// We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
// which occupies 64 bits. Thus, we can only proceed if the struct fits into
// the pointer itself.
assert(
sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) &&
"Cannot fit instruction debug line information into SMLoc's pointer");

uint32_t CompileUnitIndex = DebugAranges->findAddress(Address);
SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();

auto It = BC.OffsetToDwarfCU.find(CompileUnitIndex);
if (It == BC.OffsetToDwarfCU.end()) {
return nullptr;
} else {
return It->second;
}
auto &LineTable = ULT.second;
if (!LineTable)
return NullResult;

uint32_t RowIndex = LineTable->lookupAddress(Address);
if (RowIndex == LineTable->UnknownRowIndex)
return NullResult;

assert(RowIndex < LineTable->Rows.size() &&
"Line Table lookup returned invalid index.");

decltype(SMLoc().getPointer()) Ptr;
DebugLineTableRowRef *InstructionLocation =
reinterpret_cast<DebugLineTableRowRef *>(&Ptr);

InstructionLocation->DwCompileUnitIndex = ULT.first->getOffset();
InstructionLocation->RowIndex = RowIndex + 1;

return SMLoc::getFromPointer(Ptr);
}

} // namespace
Expand Down Expand Up @@ -179,13 +199,9 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
};

// Used in printInstruction below to print debug line information.
DWARFCompileUnit *Unit = nullptr;
const DWARFDebugLine::LineTable *LineTable = nullptr;

if (opts::PrintDebugInfo) {
Unit = FindCompileUnitForAddress(getAddress(), BC);
LineTable = Unit ? BC.DwCtx->getLineTableForUnit(Unit) : nullptr;
}
const DWARFDebugLine::LineTable *LineTable =
opts::PrintDebugInfo ? getDWARFUnitLineTable().second
: nullptr;

auto printInstruction = [&](const MCInst &Instruction) {
if (BC.MIA->isEHLabel(Instruction)) {
Expand Down Expand Up @@ -386,18 +402,14 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "End of Function \"" << getName() << "\"\n\n";
}

bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData,
bool ExtractDebugLineData) {
bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
assert(FunctionData.size() == getSize() &&
"function size does not match raw data size");

auto &Ctx = BC.Ctx;
auto &MIA = BC.MIA;
DWARFCompileUnit *CompileUnit = nullptr;

if (ExtractDebugLineData) {
CompileUnit = FindCompileUnitForAddress(getAddress(), BC);
}
DWARFUnitLineTable ULT = getDWARFUnitLineTable();

// Insert a label at the beginning of the function. This will be our first
// basic block.
Expand All @@ -409,7 +421,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData,
MCSymbol *TargetSymbol{nullptr};
if (!BC.MIA->evaluateRIPOperand(Instruction, Address, Size,
TargetAddress)) {
DEBUG(dbgs() << "BOLT: rip-relative operand could not be evaluated:\n";
DEBUG(dbgs() << "BOLT: rip-relative operand can't be evaluated:\n";
BC.InstPrinter->printInst(&Instruction, dbgs(), "", *BC.STI);
dbgs() << '\n';
Instruction.dump_pretty(dbgs(), BC.InstPrinter.get());
Expand Down Expand Up @@ -574,10 +586,9 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData,
}
}

if (CompileUnit) {
if (ULT.first && ULT.second) {
Instruction.setLoc(
findDebugLineInformationForInstructionAt(AbsoluteInstrAddr,
CompileUnit));
findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, ULT));
}

addInstruction(Offset, std::move(Instruction));
Expand All @@ -595,45 +606,6 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData,
return true;
}

SMLoc
BinaryFunction::findDebugLineInformationForInstructionAt(
uint64_t Address,
DWARFCompileUnit *Unit) {
// We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
// which occupies 64 bits. Thus, we can only proceed if the struct fits into
// the pointer itself.
assert(
sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) &&
"Cannot fit instruction debug line information into SMLoc's pointer");

const DWARFDebugLine::LineTable *LineTable =
BC.DwCtx->getLineTableForUnit(Unit);

SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();

if (!LineTable) {
return NullResult;
}

uint32_t RowIndex = LineTable->lookupAddress(Address);

if (RowIndex == LineTable->UnknownRowIndex) {
return NullResult;
}

assert(RowIndex < LineTable->Rows.size() &&
"Line Table lookup returned invalid index.");

decltype(SMLoc().getPointer()) Ptr;
DebugLineTableRowRef *InstructionLocation =
reinterpret_cast<DebugLineTableRowRef *>(&Ptr);

InstructionLocation->DwCompileUnitIndex = Unit->getOffset();
InstructionLocation->RowIndex = RowIndex + 1;

return SMLoc::getFromPointer(Ptr);
}

bool BinaryFunction::buildCFG() {

auto &MIA = BC.MIA;
Expand Down

0 comments on commit 4460da0

Please sign in to comment.