Skip to content

Commit

Permalink
[BOLT] Rework debug info processing.
Browse files Browse the repository at this point in the history
Summary:
Multiple improvements to debug info handling:
  * Add support for relocation mode.
  * Speed-up processing.
  * Reduce memory consumption.
  * Bug fixes.

The high-level idea behind the new debug handling is that we don't save
intermediate state for ranges and location lists. Instead we depend
on function and basic block address transformations to update the info
as a final post-processing step.

For HHVM in non-relocation mode the peak memory went down from 55GB to 35GB. Processing time went from over 6 minutes to under 5 minutes.

(cherry picked from FBD5113431)
  • Loading branch information
maksfb committed May 16, 2017
1 parent 511a1c7 commit 96adec5
Show file tree
Hide file tree
Showing 10 changed files with 534 additions and 730 deletions.
30 changes: 28 additions & 2 deletions bolt/BinaryBasicBlock.h
Expand Up @@ -75,7 +75,7 @@ class BinaryBasicBlock {
/// [Begin, End) address range for this block in the output binary.
std::pair<uint64_t, uint64_t> OutputAddressRange{0, 0};

/// Original range of the basic block in the function.
/// Original offset range of the basic block in the function.
std::pair<uint32_t, uint32_t> InputRange{INVALID_OFFSET, INVALID_OFFSET};

/// Alignment requirements for the block.
Expand Down Expand Up @@ -666,6 +666,11 @@ class BinaryBasicBlock {
OutputAddressRange.second = Address;
}

/// Gets the memory address range of this BB in the input binary.
std::pair<uint64_t, uint64_t> getInputAddressRange() const {
return InputRange;
}

/// Gets the memory address range of this BB in the output binary.
std::pair<uint64_t, uint64_t> getOutputAddressRange() const {
return OutputAddressRange;
Expand Down Expand Up @@ -696,6 +701,22 @@ class BinaryBasicBlock {
/// Validate successor invariants for this BB.
bool validateSuccessorInvariants();

/// Return offset of the basic block from the function start on input.
uint32_t getInputOffset() const {
return InputRange.first;
}

/// Return offset from the function start to location immediately past
/// the end of the basic block.
uint32_t getEndOffset() const {
return InputRange.second;
}

/// Return size of the basic block on input.
uint32_t getOriginalSize() const {
return InputRange.second - InputRange.first;
}

private:
void adjustNumPseudos(const MCInst &Inst, int Sign);

Expand All @@ -717,10 +738,15 @@ class BinaryBasicBlock {
void clearLandingPads();

/// Return offset of the basic block from the function start.
uint64_t getOffset() const {
uint32_t getOffset() const {
return InputRange.first;
}

/// Set end offset of this basic block.
void setEndOffset(uint32_t Offset) {
InputRange.second = Offset;
}

/// Get the index of this basic block.
unsigned getIndex() const {
assert(isValid());
Expand Down
109 changes: 14 additions & 95 deletions bolt/BinaryContext.cpp
Expand Up @@ -12,6 +12,7 @@
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
Expand All @@ -38,6 +39,15 @@ PrintDebugInfo("print-debug-info",

BinaryContext::~BinaryContext() { }

MCObjectWriter *BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
if (!MAB) {
MAB = std::unique_ptr<MCAsmBackend>(
TheTarget->createMCAsmBackend(*MRI, TripleName, ""));
}

return MAB->createObjectWriter(OS);
}

MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
Twine Prefix) {
MCSymbol *Symbol{nullptr};
Expand Down Expand Up @@ -78,7 +88,6 @@ MCSymbol *BinaryContext::getGlobalSymbolAtAddress(uint64_t Address) const {
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
BinaryFunction &ParentBF,
std::map<uint64_t, BinaryFunction> &BFs) {

// Copy name list.
ParentBF.addNewNames(ChildBF.getNames());

Expand Down Expand Up @@ -120,71 +129,12 @@ void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {

namespace {

/// Returns a binary function that contains a given address in the input
/// binary, or nullptr if none does.
BinaryFunction *getBinaryFunctionContainingAddress(
uint64_t Address,
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
auto It = BinaryFunctions.upper_bound(Address);
if (It != BinaryFunctions.begin()) {
--It;
if (It->first + It->second.getSize() > Address) {
return &It->second;
}
}
return nullptr;
}

// Traverses the DIE tree in a recursive depth-first search and finds lexical
// blocks and instances of inlined subroutines, saving them in
// AddressRangesObjects.
void findAddressRangesObjects(
const DWARFCompileUnit *Unit,
const DWARFDebugInfoEntryMinimal *DIE,
std::map<uint64_t, BinaryFunction> &Functions,
std::vector<llvm::bolt::AddressRangesDWARFObject> &AddressRangesObjects) {
auto Tag = DIE->getTag();
if (Tag == dwarf::DW_TAG_lexical_block ||
Tag == dwarf::DW_TAG_inlined_subroutine ||
Tag == dwarf::DW_TAG_try_block ||
Tag == dwarf::DW_TAG_catch_block) {
auto const &Ranges = DIE->getAddressRanges(Unit);
if (!Ranges.empty()) {
// We have to process all ranges, even for functions that we are not
// updating. The primary reason is that abbrev entries are shared
// and if we convert one DIE, it may affect the rest. Thus
// the conservative approach that does not involve expanding
// .debug_abbrev, is to switch all DIEs to use .debug_ranges, even if
// they have a simple [a,b) range. The secondary reason is that it allows
// us to get rid of the original portion of .debug_ranges to save
// space in the binary.
auto Function = getBinaryFunctionContainingAddress(Ranges.front().first,
Functions);
AddressRangesObjects.emplace_back(Unit, DIE);
auto &Object = AddressRangesObjects.back();
for (const auto &Range : Ranges) {
if (Function && Function->isSimple()) {
Object.addAddressRange(*Function, Range.first, Range.second);
} else {
Object.addAbsoluteRange(Range.first, Range.second);
}
}
}
}

// Recursively visit each child.
for (auto Child = DIE->getFirstChild(); Child; Child = Child->getSibling()) {
findAddressRangesObjects(Unit, Child, Functions, AddressRangesObjects);
}
}

/// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with
/// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that
/// are never called and removed from the binary) in Unknown.
void findSubprograms(DWARFCompileUnit *Unit,
const DWARFDebugInfoEntryMinimal *DIE,
std::map<uint64_t, BinaryFunction> &BinaryFunctions,
BinaryContext::DIECompileUnitVector &Unknown) {
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
if (DIE->isSubprogramDIE()) {
// TODO: handle DW_AT_ranges.
uint64_t LowPC, HighPC;
Expand All @@ -193,7 +143,7 @@ void findSubprograms(DWARFCompileUnit *Unit,
if (It != BinaryFunctions.end()) {
It->second.addSubprogramDIE(Unit, DIE);
} else {
Unknown.emplace_back(DIE, Unit);
// The function must have been optimized away by GC.
}
} else {
const auto RangesVector = DIE->getAddressRanges(Unit);
Expand All @@ -208,7 +158,7 @@ void findSubprograms(DWARFCompileUnit *Unit,
for (auto ChildDIE = DIE->getFirstChild();
ChildDIE != nullptr && !ChildDIE->isNULL();
ChildDIE = ChildDIE->getSibling()) {
findSubprograms(Unit, ChildDIE, BinaryFunctions, Unknown);
findSubprograms(Unit, ChildDIE, BinaryFunctions);
}
}

Expand Down Expand Up @@ -250,8 +200,7 @@ void BinaryContext::preprocessDebugInfo(
// For each CU, iterate over its children DIEs and match subprogram DIEs to
// BinaryFunctions.
for (auto &CU : DwCtx->compile_units()) {
findSubprograms(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
UnknownFunctions);
findSubprograms(CU.get(), CU->getUnitDIE(false), BinaryFunctions);
}

// Some functions may not have a corresponding subprogram DIE
Expand Down Expand Up @@ -290,36 +239,6 @@ void BinaryContext::preprocessDebugInfo(
}
}

void BinaryContext::preprocessFunctionDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
// Iterate over DIE trees finding objects that contain address ranges.
for (const auto &CU : DwCtx->compile_units()) {
findAddressRangesObjects(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
AddressRangesObjects);
}

// Iterate over location lists and save them in LocationLists.
auto DebugLoc = DwCtx->getDebugLoc();
for (const auto &DebugLocEntry : DebugLoc->getLocationLists()) {
if (DebugLocEntry.Entries.empty())
continue;
const auto StartAddress = DebugLocEntry.Entries.front().Begin;
auto *Function = getBinaryFunctionContainingAddress(StartAddress,
BinaryFunctions);
if (!Function || !Function->isSimple())
continue;
LocationLists.emplace_back(DebugLocEntry.Offset);
auto &LocationList = LocationLists.back();
for (const auto &Location : DebugLocEntry.Entries) {
LocationList.addLocation(
&Location.Loc,
*Function,
Location.Begin,
Location.End);
}
}
}

void BinaryContext::printCFI(raw_ostream &OS, uint32_t Operation) {
switch(Operation) {
case MCCFIInstruction::OpSameValue: OS << "OpSameValue"; break;
Expand Down
25 changes: 4 additions & 21 deletions bolt/BinaryContext.h
Expand Up @@ -106,25 +106,9 @@ class BinaryContext {
/// when a function has more than a single entry point.
std::set<uint64_t> InterproceduralReferences;

/// List of DWARF location lists in .debug_loc.
std::vector<LocationList> LocationLists;

/// Section relocations.
std::map<SectionRef, std::set<Relocation>> SectionRelocations;

/// List of DWARF entries in .debug_info that have address ranges to be
/// updated. These include lexical blocks (DW_TAG_lexical_block) and concrete
/// instances of inlined subroutines (DW_TAG_inlined_subroutine).
std::vector<AddressRangesDWARFObject> AddressRangesObjects;

using DIECompileUnitVector =
std::vector<std::pair<const DWARFDebugInfoEntryMinimal *,
const DWARFCompileUnit *>> ;

/// List of subprogram DIEs that have addresses that don't match any
/// function, along with their CU.
DIECompileUnitVector UnknownFunctions;

std::unique_ptr<MCContext> Ctx;

std::unique_ptr<DWARFContext> DwCtx;
Expand Down Expand Up @@ -153,6 +137,8 @@ class BinaryContext {

std::unique_ptr<MCDisassembler> DisAsm;

std::unique_ptr<MCAsmBackend> MAB;

std::function<void(std::error_code)> ErrorCheck;

const DataReader &DR;
Expand Down Expand Up @@ -190,6 +176,8 @@ class BinaryContext {

~BinaryContext();

MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS);

/// Return a global symbol registered at a given \p Address. If no symbol
/// exists, create one with unique name using \p Prefix.
/// If there are multiple symbols registered at the \p Address, then
Expand Down Expand Up @@ -251,11 +239,6 @@ class BinaryContext {
void preprocessDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions);

/// Populate internal data structures with debug info that depends on
/// disassembled functions.
void preprocessFunctionDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions);

/// Add a filename entry from SrcCUID to DestCUID.
unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
const uint32_t SrcCUID,
Expand Down

0 comments on commit 96adec5

Please sign in to comment.