Skip to content

Commit

Permalink
Update DWARF lexical blocks address ranges.
Browse files Browse the repository at this point in the history
Summary:
Updates DWARF lexical blocks address ranges in the output binary after optimizations.
This is similar to updating function address ranges except that the ranges representation needs
to be more general, since address ranges can begin or end in the middle of a basic block.

The following changes were made:

- Added a data structure for iterating over the basic blocks that intersect an address range: BasicBlockTable.h
- Added some more bookkeeping in BinaryBasicBlock. Basically, I needed to keep track of the block's size in the input binary as well as its address in the output binary. This information is mostly set by BinaryFunction after disassembly.
- Added a representation for address ranges relative to basic blocks (BasicBlockOffsetRanges.h). Will also serve for location lists.
- Added a representation for Lexical Blocks (LexicalBlock.h)
- Small refactorings in DebugArangesWriter:
-- Renamed to DebugRangesSectionsWriter since it also writes .debug_ranges
-- Refactored it not to depend on BinaryFunction but instead on anything that can be assined an aoffset in .debug_ranges (added an interface for that)
- Iterate over the DIE tree during initialization to find lexical blocks in .debug_info (BinaryContext.cpp)
- Added patches to .debug_abbrev and .debug_info in RewriteInstance to update lexical blocks attributes (in fact, this part is very similar to what was done to function address ranges and I just refactored/reused that code)
- Added small test case (lexical_blocks_address_ranges_debug.test)

(cherry picked from FBD3113181)
  • Loading branch information
Gabriel Poesia authored and maksfb committed Mar 29, 2016
1 parent e8ef8a5 commit ffa9641
Show file tree
Hide file tree
Showing 14 changed files with 512 additions and 119 deletions.
70 changes: 70 additions & 0 deletions bolt/BasicBlockOffsetRanges.cpp
@@ -0,0 +1,70 @@
//===- BasicBlockOffsetRanges.cpp - list of address ranges relative to BBs ===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//

#include "BasicBlockOffsetRanges.h"
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"

namespace llvm {
namespace bolt {

void BasicBlockOffsetRanges::addAddressRange(BinaryFunction &Function,
uint64_t BeginAddress,
uint64_t EndAddress) {
auto FirstBB = Function.getBasicBlockContainingOffset(
BeginAddress - Function.getAddress());
assert(FirstBB && "No basic blocks in the function intersect given range.");

for (auto I = Function.getIndex(FirstBB), S = Function.size(); I != S; ++I) {
auto BB = Function.getBasicBlockAtIndex(I);
uint64_t BBAddress = Function.getAddress() + BB->getOffset();
if (BBAddress >= EndAddress)
break;

uint64_t InternalAddressRangeBegin = std::max(BBAddress, BeginAddress);
assert(BB->getFunction() == &Function &&
"Mismatching functions.\n");
uint64_t InternalAddressRangeEnd =
std::min(BBAddress + Function.getBasicBlockOriginalSize(BB),
EndAddress);

AddressRanges.push_back(
BBAddressRange{
BB,
static_cast<uint16_t>(InternalAddressRangeBegin - BBAddress),
static_cast<uint16_t>(InternalAddressRangeEnd - BBAddress)});
}
}

std::vector<std::pair<uint64_t, uint64_t>>
BasicBlockOffsetRanges::getAbsoluteAddressRanges() const {
std::vector<std::pair<uint64_t, uint64_t>> AbsoluteRanges;
for (const auto &BBAddressRange : AddressRanges) {
auto BBOutputAddressRange =
BBAddressRange.BasicBlock->getOutputAddressRange();
uint64_t NewRangeBegin = BBOutputAddressRange.first +
BBAddressRange.RangeBeginOffset;
// If the end offset pointed to the end of the basic block, then we set
// the new end range to cover the whole basic block as the BB's size
// might have increased.
auto BBFunction = BBAddressRange.BasicBlock->getFunction();
uint64_t NewRangeEnd =
(BBAddressRange.RangeEndOffset ==
BBFunction->getBasicBlockOriginalSize(BBAddressRange.BasicBlock))
? BBOutputAddressRange.second
: (BBOutputAddressRange.first + BBAddressRange.RangeEndOffset);
AbsoluteRanges.emplace_back(NewRangeBegin, NewRangeEnd);
}
return AbsoluteRanges;
}

} // namespace bolt
} // namespace llvm
58 changes: 58 additions & 0 deletions bolt/BasicBlockOffsetRanges.h
@@ -0,0 +1,58 @@
//===--- BasicBlockOffsetRanges.h - list of address ranges relative to BBs ===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Represents a list of address ranges where addresses are relative to the
// beginning of basic blocks. Useful for converting address ranges in the input
// binary to equivalent ranges after optimizations take place.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_BOLT_BASIC_BLOCK_OFFSET_RANGES_H
#define LLVM_TOOLS_LLVM_BOLT_BASIC_BLOCK_OFFSET_RANGES_H

#include <map>
#include <utility>
#include <vector>

namespace llvm {
namespace bolt {

class BinaryFunction;
class BinaryBasicBlock;

class BasicBlockOffsetRanges {
private:
/// An address range inside one basic block.
struct BBAddressRange {
const BinaryBasicBlock *BasicBlock;
/// Beginning of the range counting from BB's start address.
uint16_t RangeBeginOffset;
/// (Exclusive) end of the range counting from BB's start address.
uint16_t RangeEndOffset;
};

std::vector<BBAddressRange> AddressRanges;

public:
/// Add range [BeginAddress, EndAddress) to the address ranges list.
/// \p Function is the function that contains the given address range.
void addAddressRange(BinaryFunction &Function,
uint64_t BeginAddress,
uint64_t EndAddress);

/// Returns the list of absolute addresses calculated using the output address
/// of the basic blocks, i.e. the input ranges updated after basic block
/// addresses might have changed.
std::vector<std::pair<uint64_t, uint64_t>> getAbsoluteAddressRanges() const;
};

} // namespace bolt
} // namespace llvm

#endif
1 change: 0 additions & 1 deletion bolt/BinaryBasicBlock.cpp
Expand Up @@ -10,7 +10,6 @@
//===----------------------------------------------------------------------===//

#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
Expand Down
37 changes: 36 additions & 1 deletion bolt/BinaryBasicBlock.h
Expand Up @@ -26,6 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include <utility>

namespace llvm {
namespace bolt {
Expand All @@ -39,6 +40,15 @@ class BinaryBasicBlock {
/// Label associated with the block.
MCSymbol *Label{nullptr};

/// Function that owns this basic block.
BinaryFunction *Function;

/// Label associated with the end of the block in the output binary.
MCSymbol *EndLabel{nullptr};

/// [Begin, End) address range for this block in the output binary.
std::pair<uint64_t, uint64_t> OutputAddressRange{0, 0};

/// Original offset in the function.
uint64_t Offset{std::numeric_limits<uint64_t>::max()};

Expand Down Expand Up @@ -80,8 +90,9 @@ class BinaryBasicBlock {

explicit BinaryBasicBlock(
MCSymbol *Label,
BinaryFunction *Function,
uint64_t Offset = std::numeric_limits<uint64_t>::max())
: Label(Label), Offset(Offset) {}
: Label(Label), Function(Function), Offset(Offset) {}

explicit BinaryBasicBlock(uint64_t Offset)
: Offset(Offset) {}
Expand Down Expand Up @@ -261,6 +272,30 @@ class BinaryBasicBlock {
return false;
}

/// Sets the symbol pointing to the end of the BB in the output binary.
void setEndLabel(MCSymbol *Symbol) {
EndLabel = Symbol;
}

/// Gets the symbol pointing to the end of the BB in the output binary.
MCSymbol *getEndLabel() const {
return EndLabel;
}

/// Sets the memory address range of this BB in the output binary.
void setOutputAddressRange(std::pair<uint64_t, uint64_t> Range) {
OutputAddressRange = Range;
}

/// Gets the memory address range of this BB in the output binary.
std::pair<uint64_t, uint64_t> getOutputAddressRange() const {
return OutputAddressRange;
}

BinaryFunction *getFunction() const {
return Function;
}

private:

/// Adds predecessor to the BB. Most likely you don't need to call this.
Expand Down
62 changes: 62 additions & 0 deletions bolt/BinaryContext.cpp
Expand Up @@ -12,12 +12,15 @@
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"

namespace llvm {
namespace bolt {

BinaryContext::~BinaryContext() { }

MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
Twine Prefix) {
MCSymbol *Symbol{nullptr};
Expand All @@ -44,6 +47,59 @@ MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
return Symbol;
}

} // namespace bolt
} // namespace llvm

namespace {

using namespace llvm;
using namespace bolt;

/// Returns the binary function that contains a given address in the input
/// binary, or nullptr if none does.
BinaryFunction *getBinaryFunctionContainingAddress(
uint64_t Address,
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
auto It = BinaryFunctions.upper_bound(Address);
if (It != BinaryFunctions.begin()) {
--It;
if (It->first + It->second.getSize() > Address) {
return &It->second;
}
}
return nullptr;
}

// Traverses the DIE tree in a recursive depth-first search and finds lexical
// blocks, saving them in LexicalBlocks.
void findLexicalBlocks(const DWARFCompileUnit *Unit,
const DWARFDebugInfoEntryMinimal *DIE,
std::map<uint64_t, BinaryFunction> &Functions,
std::vector<llvm::bolt::LexicalBlock> &LexicalBlocks) {
if (DIE->getTag() == dwarf::DW_TAG_lexical_block) {
LexicalBlocks.emplace_back(Unit, DIE);
auto &LB = LexicalBlocks.back();
for (const auto &Range : DIE->getAddressRanges(Unit)) {
if (auto *Function = getBinaryFunctionContainingAddress(Range.first,
Functions)) {
if (Function->isSimple()) {
LB.addAddressRange(*Function, Range.first, Range.second);
}
}
}
}

// Recursively visit each child.
for (auto Child = DIE->getFirstChild(); Child; Child = Child->getSibling()) {
findLexicalBlocks(Unit, Child, Functions, LexicalBlocks);
}
}

} // namespace

namespace llvm {
namespace bolt {

void BinaryContext::preprocessDebugInfo() {
// Iterate over all DWARF compilation units and map their offset in the
// binary to themselves in OffsetDwarfCUMap
Expand Down Expand Up @@ -95,6 +151,12 @@ void BinaryContext::preprocessFunctionDebugInfo(
}
}
}

// Iterate over DIE trees finding lexical blocks.
for (const auto &CU : DwCtx->compile_units()) {
findLexicalBlocks(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
LexicalBlocks);
}
}

} // namespace bolt
Expand Down
6 changes: 5 additions & 1 deletion bolt/BinaryContext.h
Expand Up @@ -14,6 +14,7 @@
#ifndef LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H
#define LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H

#include "LexicalBlock.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
Expand Down Expand Up @@ -69,6 +70,9 @@ class BinaryContext {
/// Maps DWARF CUID to offset of stmt_list attribute in .debug_info.
std::map<unsigned, uint32_t> LineTableOffsetCUMap;

/// List of DWARF lexical blocks in .debug_info.
std::vector<LexicalBlock> LexicalBlocks;

std::unique_ptr<MCContext> Ctx;

std::unique_ptr<DWARFContext> DwCtx;
Expand Down Expand Up @@ -132,7 +136,7 @@ class BinaryContext {
DisAsm(std::move(DisAsm)),
DR(DR) {}

~BinaryContext() {}
~BinaryContext();

/// Return a global symbol registered at a given \p Address. If no symbol
/// exists, create one with unique name using \p Prefix.
Expand Down
12 changes: 11 additions & 1 deletion bolt/BinaryFunction.cpp
Expand Up @@ -87,6 +87,16 @@ BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
return &(*--I);
}

size_t
BinaryFunction::getBasicBlockOriginalSize(const BinaryBasicBlock *BB) const {
auto Index = getIndex(BB);
if (Index + 1 == BasicBlocks.size()) {
return Size - BB->getOffset();
} else {
return BasicBlocks[Index + 1].getOffset() - BB->getOffset();
}
}

unsigned BinaryFunction::eraseDeadBBs(
std::map<BinaryBasicBlock *, bool> &ToPreserve) {
BasicBlockOrderType NewLayout;
Expand Down Expand Up @@ -704,7 +714,7 @@ bool BinaryFunction::buildCFG() {
}
}

// Set the basic block layout to the original order
// Set the basic block layout to the original order.
for (auto &BB : BasicBlocks) {
BasicBlocksLayout.emplace_back(&BB);
}
Expand Down
13 changes: 11 additions & 2 deletions bolt/BinaryFunction.h
Expand Up @@ -19,6 +19,7 @@

#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "DebugRangesSectionsWriter.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/MC/MCCodeEmitter.h"
Expand Down Expand Up @@ -48,7 +49,7 @@ namespace bolt {
/// BinaryFunction is a representation of machine-level function.
//
/// We use the term "Binary" as "Machine" was already taken.
class BinaryFunction {
class BinaryFunction : public AddressRangesOwner {
public:
enum class State : char {
Empty = 0, /// Function body is empty
Expand Down Expand Up @@ -370,6 +371,11 @@ class BinaryFunction {
return I;
}

/// Returns the n-th basic block in this function in its original layout, or
/// nullptr if n >= size().
const BinaryBasicBlock * getBasicBlockAtIndex(unsigned Index) const {
return &BasicBlocks.at(Index);
}

/// Return the name of the function as extracted from the binary file.
StringRef getName() const {
Expand Down Expand Up @@ -465,7 +471,7 @@ class BinaryFunction {
assert(BC.Ctx && "cannot be called with empty context");
if (!Label)
Label = BC.Ctx->createTempSymbol("BB", true);
BasicBlocks.emplace_back(BinaryBasicBlock(Label, Offset));
BasicBlocks.emplace_back(BinaryBasicBlock(Label, this, Offset));

auto BB = &BasicBlocks.back();

Expand Down Expand Up @@ -758,6 +764,9 @@ class BinaryFunction {
return DIECompileUnit;
}

/// Returns the size of the basic block in the original binary.
size_t getBasicBlockOriginalSize(const BinaryBasicBlock *BB) const;

virtual ~BinaryFunction() {}

/// Info for fragmented functions.
Expand Down

0 comments on commit ffa9641

Please sign in to comment.