Skip to content

Commit

Permalink
[DebugInfo] Drop location ranges for variables which exist entirely o…
Browse files Browse the repository at this point in the history
…utside the variable's scope

Summary:
This patch reduces file size in debug builds by dropping variable locations a
debugger user will not see.

After building the debug entity history map we loop through it. For each
variable we look at each entry. If the entry opens a location range which does
not intersect any of the variable's scope's ranges then we mark it for removal.
After visiting the entries for each variable we also mark any clobbering
entries which will no longer be referenced for removal, and then finally erase
the marked entries. This all requires the ability to query the order of
instructions, so before this runs we number them.

Tests:
Added llvm/test/DebugInfo/X86/trim-var-locs.mir

Modified llvm/test/DebugInfo/COFF/register-variables.ll
  Branch folding merges the tails of if.then and if.else into if.else. Each
  blocks' debug-locations point to different scopes so when they're merged we
  can't use either. Because of this the variable 'c' ends up with a location
  range which doesn't cover any instructions in its scope; with the patch
  applied the location range is dropped and its flag changes to IsOptimizedOut.

Modified llvm/test/DebugInfo/X86/live-debug-variables.ll
Modified llvm/test/DebugInfo/ARM/PR26163.ll
  In both tests an out of scope location is now removed. The remaining location
  covers the entire scope of the variable allowing us to emit it as a single
  location.

Reviewed By: aprantl

Differential Revision: https://reviews.llvm.org/D82129
  • Loading branch information
OCHyams committed Jul 22, 2020
1 parent 3520297 commit ce6de37
Show file tree
Hide file tree
Showing 7 changed files with 340 additions and 22 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
Expand Up @@ -12,6 +12,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include <utility>

namespace llvm {
Expand Down Expand Up @@ -52,6 +53,8 @@ class DbgValueHistoryMap {
/// register-described debug values that have their end index
/// set to this entry's position in the entry vector.
class Entry {
friend DbgValueHistoryMap;

public:
enum EntryKind { DbgValue, Clobber };

Expand Down Expand Up @@ -89,6 +92,8 @@ class DbgValueHistoryMap {
return Entries[Index];
}

/// Drop location ranges which exist entirely outside each variable's scope.
void trimLocationRanges(const MachineFunction &MF, LexicalScopes &LScopes);
bool empty() const { return VarEntries.empty(); }
void clear() { VarEntries.clear(); }
EntriesMap::const_iterator begin() const { return VarEntries.begin(); }
Expand Down
188 changes: 188 additions & 0 deletions llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
Expand Up @@ -8,9 +8,11 @@

#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
Expand Down Expand Up @@ -90,6 +92,192 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
EndIndex = Index;
}

using OrderMap = DenseMap<const MachineInstr *, unsigned>;
/// Number instructions so that we can compare instruction positions within MF.
/// Meta instructions are given the same nubmer as the preceding instruction.
/// Because the block ordering will not change it is possible (and safe) to
/// compare instruction positions between blocks.
static void numberInstructions(const MachineFunction &MF, OrderMap &Ordering) {
// We give meta instructions the same number as the peceding instruction
// because this function is written for the task of comparing positions of
// variable location ranges against scope ranges. To reflect what we'll see
// in the binary, when we look at location ranges we must consider all
// DBG_VALUEs between two real instructions at the same position. And a
// scope range which ends on a meta instruction should be considered to end
// at the last seen real instruction. E.g.
//
// 1 instruction p Both the variable location for x and for y start
// 1 DBG_VALUE for "x" after instruction p so we give them all the same
// 1 DBG_VALUE for "y" number. If a scope range ends at DBG_VALUE for "y",
// 2 instruction q we should treat it as ending after instruction p
// because it will be the last real instruction in the
// range. DBG_VALUEs at or after this position for
// variables declared in the scope will have no effect.
unsigned position = 0;
for (const MachineBasicBlock &MBB : MF)
for (const MachineInstr &MI : MBB)
Ordering[&MI] = MI.isMetaInstruction() ? position : ++position;
}

/// Check if instruction A comes before B. Meta instructions have the same
/// position as the preceding non-meta instruction. See numberInstructions for
/// more info.
static bool isBefore(const MachineInstr *A, const MachineInstr *B,
const OrderMap &Ordering) {
return Ordering.lookup(A) < Ordering.lookup(B);
}

/// Check if the instruction range [StartMI, EndMI] intersects any instruction
/// range in Ranges. EndMI can be nullptr to indicate that the range is
/// unbounded. Assumes Ranges is ordered and disjoint. Returns true and points
/// to the first intersecting scope range if one exists.
static Optional<ArrayRef<InsnRange>::iterator>
intersects(const MachineInstr *StartMI, const MachineInstr *EndMI,
const ArrayRef<InsnRange> &Ranges, const OrderMap &Ordering) {
for (auto RangesI = Ranges.begin(), RangesE = Ranges.end();
RangesI != RangesE; ++RangesI) {
if (EndMI && isBefore(EndMI, RangesI->first, Ordering))
return None;
if (EndMI && !isBefore(RangesI->second, EndMI, Ordering))
return RangesI;
if (isBefore(StartMI, RangesI->second, Ordering))
return RangesI;
}
return None;
}

void DbgValueHistoryMap::trimLocationRanges(const MachineFunction &MF,
LexicalScopes &LScopes) {
OrderMap Ordering;
numberInstructions(MF, Ordering);

// The indices of the entries we're going to remove for each variable.
SmallVector<EntryIndex, 4> ToRemove;
// Entry reference count for each variable. Clobbers left with no references
// will be removed.
SmallVector<int, 4> ReferenceCount;
// Entries reference other entries by index. Offsets is used to remap these
// references if any entries are removed.
SmallVector<size_t, 4> Offsets;

for (auto &Record : VarEntries) {
auto &HistoryMapEntries = Record.second;
if (HistoryMapEntries.empty())
continue;

InlinedEntity Entity = Record.first;
const DILocalVariable *LocalVar = cast<DILocalVariable>(Entity.first);

LexicalScope *Scope = nullptr;
if (const DILocation *InlinedAt = Entity.second) {
Scope = LScopes.findInlinedScope(LocalVar->getScope(), InlinedAt);
} else {
Scope = LScopes.findLexicalScope(LocalVar->getScope());
// Ignore variables for non-inlined function level scopes. The scope
// ranges (from scope->getRanges()) will not include any instructions
// before the first one with a debug-location, which could cause us to
// incorrectly drop a location. We could introduce special casing for
// these variables, but it doesn't seem worth it because no out-of-scope
// locations have been observed for variables declared in function level
// scopes.
if (Scope &&
(Scope->getScopeNode() == Scope->getScopeNode()->getSubprogram()) &&
(Scope->getScopeNode() == LocalVar->getScope()))
continue;
}

// If there is no scope for the variable then something has probably gone
// wrong.
if (!Scope)
continue;

ToRemove.clear();
// Zero the reference counts.
ReferenceCount.assign(HistoryMapEntries.size(), 0);
// Index of the DBG_VALUE which marks the start of the current location
// range.
EntryIndex StartIndex = 0;
ArrayRef<InsnRange> ScopeRanges(Scope->getRanges());
for (auto EI = HistoryMapEntries.begin(), EE = HistoryMapEntries.end();
EI != EE; ++EI, ++StartIndex) {
// Only DBG_VALUEs can open location ranges so skip anything else.
if (!EI->isDbgValue())
continue;

// Index of the entry which closes this range.
EntryIndex EndIndex = EI->getEndIndex();
// If this range is closed bump the reference count of the closing entry.
if (EndIndex != NoEntry)
ReferenceCount[EndIndex] += 1;
// Skip this location range if the opening entry is still referenced. It
// may close a location range which intersects a scope range.
// TODO: We could be 'smarter' and trim these kinds of ranges such that
// they do not leak out of the scope ranges if they partially overlap.
if (ReferenceCount[StartIndex] > 0)
continue;

const MachineInstr *StartMI = EI->getInstr();
const MachineInstr *EndMI = EndIndex != NoEntry
? HistoryMapEntries[EndIndex].getInstr()
: nullptr;
// Check if the location range [StartMI, EndMI] intersects with any scope
// range for the variable.
if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) {
// Adjust ScopeRanges to exclude ranges which subsequent location ranges
// cannot possibly intersect.
ScopeRanges = ArrayRef<InsnRange>(R.getValue(), ScopeRanges.end());
} else {
// If the location range does not intersect any scope range then the
// DBG_VALUE which opened this location range is usless, mark it for
// removal.
ToRemove.push_back(StartIndex);
// Because we'll be removing this entry we need to update the reference
// count of the closing entry, if one exists.
if (EndIndex != NoEntry)
ReferenceCount[EndIndex] -= 1;
}
}

// If there is nothing to remove then jump to next variable.
if (ToRemove.empty())
continue;

// Mark clobbers that will no longer close any location ranges for removal.
for (size_t i = 0; i < HistoryMapEntries.size(); ++i)
if (ReferenceCount[i] <= 0 && HistoryMapEntries[i].isClobber())
ToRemove.push_back(i);

std::sort(ToRemove.begin(), ToRemove.end());

// Build an offset map so we can update the EndIndex of the remaining
// entries.
// Zero the offsets.
Offsets.assign(HistoryMapEntries.size(), 0);
size_t CurOffset = 0;
auto ToRemoveItr = ToRemove.begin();
for (size_t EntryIdx = *ToRemoveItr; EntryIdx < HistoryMapEntries.size();
++EntryIdx) {
// Check if this is an entry which will be removed.
if (ToRemoveItr != ToRemove.end() && *ToRemoveItr == EntryIdx) {
++ToRemoveItr;
++CurOffset;
}
Offsets[EntryIdx] = CurOffset;
}

// Update the EndIndex of the entries to account for those which will be
// removed.
for (auto &Entry : HistoryMapEntries)
if (Entry.isClosed())
Entry.EndIndex -= Offsets[Entry.EndIndex];

// Now actually remove the entries. Iterate backwards so that our remaining
// ToRemove indices are valid after each erase.
for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr)
HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr);
}
}

void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
assert(MI.isDebugLabel() && "not a DBG_LABEL");
LabelInstr[Label] = &MI;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
Expand Up @@ -21,11 +21,16 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;

#define DEBUG_TYPE "dwarfdebug"

/// If true, we drop variable location ranges which exist entirely outside the
/// variable's lexical scope instruction ranges.
static cl::opt<bool> TrimVarLocs("trim-var-locs", cl::Hidden, cl::init(true));

Optional<DbgVariableLocation>
DbgVariableLocation::extractFromMachineInstruction(
const MachineInstr &Instruction) {
Expand Down Expand Up @@ -191,6 +196,8 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!");
calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
DbgValues, DbgLabels);
if (TrimVarLocs)
DbgValues.trimLocationRanges(*MF, LScopes);
LLVM_DEBUG(DbgValues.dump());

// Request labels for the full history.
Expand Down
12 changes: 8 additions & 4 deletions llvm/test/DebugInfo/ARM/PR26163.ll
Expand Up @@ -8,10 +8,14 @@
; but it is what is currently being emitted. Any change here needs to be
; intentional, so the test is very specific.
;
; CHECK: DW_TAG_inlined_subroutine
; CHECK: DW_TAG_variable
; CHECK: DW_AT_location ({{.*}}
; CHECK-NEXT: [0x00000004, 0x00000014): DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4)
; The variable is given a single location instead of a location list entry
; because the function validThroughout has a special code path for single
; locations with a constant value that start in the prologue.
;
; CHECK: DW_TAG_inlined_subroutine
; CHECK: DW_TAG_variable
; CHECK-NEXT: DW_AT_location (DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4)
; CHECK-NEXT DW_AT_name ("i4")

; Created form the following test case (PR26163) with
; clang -cc1 -triple armv4t--freebsd11.0-gnueabi -emit-obj -debug-info-kind=standalone -O2 -x c test.c
Expand Down
13 changes: 2 additions & 11 deletions llvm/test/DebugInfo/COFF/register-variables.ll
Expand Up @@ -60,13 +60,11 @@
; ASM: .cv_def_range [[p_ecx_esi]] [[func_end]], reg, 23
; ASM: .short 4414 # Record kind: S_LOCAL
; ASM: .asciz "c"
; ASM: .cv_def_range [[after_if]] [[func_finished]], reg, 17
; ASM: .short 4414 # Record kind: S_LOCAL
; ASM: .asciz "a"
; ASM: .cv_def_range [[after_je]] [[after_inc_eax]], reg, 17
; ASM: .short 4414 # Record kind: S_LOCAL
; ASM: .asciz "b"
; ASM: .cv_def_range [[after_if]] [[after_if]], reg, 17

; Note: "b" is a victim of tail de-duplication / branch folding.

Expand Down Expand Up @@ -109,18 +107,11 @@
; OBJ: }
; OBJ: LocalSym {
; OBJ: Type: int (0x74)
; OBJ: Flags [ (0x0)
; OBJ: Flags [ (0x100)
; OBJ: IsOptimizedOut (0x100)
; OBJ: ]
; OBJ: VarName: c
; OBJ: }
; OBJ: DefRangeRegisterSym {
; OBJ: Register: EAX (0x11)
; OBJ: LocalVariableAddrRange {
; OBJ: OffsetStart: .text+0x1A
; OBJ: ISectStart: 0x0
; OBJ: Range: 0xC
; OBJ: }
; OBJ: }
; OBJ: LocalSym {
; OBJ: Type: int (0x74)
; OBJ: Flags [ (0x0)
Expand Down
16 changes: 9 additions & 7 deletions llvm/test/DebugInfo/X86/live-debug-variables.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=x86_64-linux-gnu -filetype=obj -o - %s | llvm-dwarfdump -debug-loc - | FileCheck %s
; RUN: llc -mtriple=x86_64-linux-gnu -filetype=obj -o - %s | llvm-dwarfdump -name i4 - \
; RUN: | FileCheck %s

; The test inlines the function F four times, with each inlined variable for
; "i4" sharing the same virtual register. This means the live interval of the
Expand All @@ -22,12 +23,13 @@
; F(a,b,c,d,e);
; }

; CHECK: .debug_loc contents:
; CHECK-NEXT: 0x00000000:
; We currently emit an entry for the function prologue, too, which could be optimized away.
; CHECK: (0x0000000000000018, 0x0000000000000072): DW_OP_reg3 RBX
; We should only have one entry inside the function.
; CHECK-NOT: :
; Ignore the abstract entry.
; CHECK: DW_TAG_formal_parameter
; Check concrete entry has a single location.
; CHECK: DW_TAG_formal_parameter
; CHECK-NEXT: DW_AT_location (DW_OP_reg3 RBX)
; CHECK-NEXT: DW_AT_abstract_origin
; CHECK-NOT: DW_TAG_formal_parameter

declare i32 @foobar(i32, i32, i32, i32, i32)

Expand Down

0 comments on commit ce6de37

Please sign in to comment.