Skip to content

Commit

Permalink
[Assignment Tracking] Coalesce dbg loc definitions with contiguous fr…
Browse files Browse the repository at this point in the history
…agments

MemLocFragmentFill uses an IntervalMap to track which bits of each variable are
stack-homed. Intervals with the same value (same stack location base address)
are automatically coalesced by the map. This patch changes the analysis to take
advantage of that and insert a new dbg loc after each def if any coalescing
took place. This results in some additional redundant defs (we insert a def,
then another that by definition shadows the previous one if any coalescing took
place) but they're all cleaned up thanks to the previous patch in this stack.

This reduces the total number of fragments created by
AssignmentTrackingAnalysis which reduces compile time because LiveDebugValues
computes SSA for every fragment it encounters. There's a geomean reduction in
instructions retired in a CTMark LTO-O3-g build of 0.3% with these two patches.

One small caveat is that this technique can produce partially overlapping
fragments (e.g. slice [0, 32) and slice [16, 64)), which we know
LiveDebugVariables doesn't really handle correctly. Used in combination with
instruction-referencing this isn't a problem, since LiveDebugVariables is
effectively side-stepped in instruction-referencing mode. Given this, the
coalescing is only enabled when instruction-referencing is enabled (but the
behaviour can be overriden using -debug-ata-coalesce-frags=<bool>).

Reviewed By: jmorse

Differential Revision: https://reviews.llvm.org/D146980
  • Loading branch information
OCHyams committed Mar 29, 2023
1 parent 8e56a19 commit d4879d7
Show file tree
Hide file tree
Showing 9 changed files with 391 additions and 19 deletions.
71 changes: 66 additions & 5 deletions llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -1,4 +1,5 @@
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "LiveDebugValues/LiveDebugValues.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/IntervalMap.h"
Expand Down Expand Up @@ -48,6 +49,12 @@ static cl::opt<bool> EnableMemLocFragFill("mem-loc-frag-fill", cl::init(true),
static cl::opt<bool> PrintResults("print-debug-ata", cl::init(false),
cl::Hidden);

/// Coalesce adjacent dbg locs describing memory locations that have contiguous
/// fragments. This reduces the cost of LiveDebugValues which does SSA
/// construction for each explicitly stated variable fragment.
static cl::opt<cl::boolOrDefault>
CoalesceAdjacentFragmentsOpt("debug-ata-coalesce-frags", cl::Hidden);

// Implicit conversions are disabled for enum class types, so unfortunately we
// need to create a DenseMapInfo wrapper around the specified underlying type.
template <> struct llvm::DenseMapInfo<VariableID> {
Expand Down Expand Up @@ -287,6 +294,24 @@ static DebugAggregate getAggregate(const DebugVariable &Var) {
return DebugAggregate(Var.getVariable(), Var.getInlinedAt());
}

static bool shouldCoalesceFragments(Function &F) {
// Enabling fragment coalescing reduces compiler run time when instruction
// referencing is enabled. However, it may cause LiveDebugVariables to create
// incorrect locations. Since instruction-referencing mode effectively
// bypasses LiveDebugVariables we only enable coalescing if the cl::opt flag
// has not been explicitly set and instruction-referencing is turned on.
switch (CoalesceAdjacentFragmentsOpt) {
case cl::boolOrDefault::BOU_UNSET:
return debuginfoShouldUseDebugInstrRef(
Triple(F.getParent()->getTargetTriple()));
case cl::boolOrDefault::BOU_TRUE:
return true;
case cl::boolOrDefault::BOU_FALSE:
return false;
}
llvm_unreachable("Unknown boolOrDefault value");
}

namespace {
/// In dwarf emission, the following sequence
/// 1. dbg.value ... Fragment(0, 64)
Expand All @@ -310,6 +335,7 @@ class MemLocFragmentFill {
Function &Fn;
FunctionVarLocsBuilder *FnVarLocs;
const DenseSet<DebugAggregate> *VarsWithStackSlot;
bool CoalesceAdjacentFragments;

// 0 = no memory location.
using BaseAddress = unsigned;
Expand Down Expand Up @@ -574,6 +600,31 @@ class MemLocFragmentFill {
<< " bits [" << StartBit << ", " << EndBit << ")\n");
}

/// Inserts a new dbg def if the interval found when looking up \p StartBit
/// in \p FragMap starts before \p StartBit or ends after \p EndBit (which
/// indicates - assuming StartBit->EndBit has just been inserted - that the
/// slice has been coalesced in the map).
void coalesceFragments(BasicBlock &BB, Instruction &Before, unsigned Var,
unsigned StartBit, unsigned EndBit, unsigned Base,
DebugLoc DL, const FragsInMemMap &FragMap) {
if (!CoalesceAdjacentFragments)
return;
// We've inserted the location into the map. The map will have coalesced
// adjacent intervals (variable fragments) that describe the same memory
// location. Use this knowledge to insert a debug location that describes
// that coalesced fragment. This may eclipse other locs we've just
// inserted. This is okay as redundant locs will be cleaned up later.
auto CoalescedFrag = FragMap.find(StartBit);
// Bail if no coalescing has taken place.
if (CoalescedFrag.start() == StartBit && CoalescedFrag.stop() == EndBit)
return;

LLVM_DEBUG(dbgs() << "- Insert loc for bits " << CoalescedFrag.start()
<< " to " << CoalescedFrag.stop() << "\n");
insertMemLoc(BB, Before, Var, CoalescedFrag.start(), CoalescedFrag.stop(),
Base, DL);
}

void addDef(const VarLocInfo &VarLoc, Instruction &Before, BasicBlock &BB,
VarFragMap &LiveSet) {
DebugVariable DbgVar = FnVarLocs->getVariable(VarLoc.VariableID);
Expand Down Expand Up @@ -639,6 +690,8 @@ class MemLocFragmentFill {
if (!FragMap.overlaps(StartBit, EndBit)) {
LLVM_DEBUG(dbgs() << "- No overlaps\n");
FragMap.insert(StartBit, EndBit, Base);
coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
FragMap);
return;
}
// There is at least one overlap.
Expand Down Expand Up @@ -729,6 +782,9 @@ class MemLocFragmentFill {
LLVM_DEBUG(dbgs() << "- Insert DEF into now-empty space\n");
FragMap.insert(StartBit, EndBit, Base);
}

coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
FragMap);
}

bool skipVariable(const DILocalVariable *V) { return !V->getSizeInBits(); }
Expand All @@ -746,8 +802,10 @@ class MemLocFragmentFill {

public:
MemLocFragmentFill(Function &Fn,
const DenseSet<DebugAggregate> *VarsWithStackSlot)
: Fn(Fn), VarsWithStackSlot(VarsWithStackSlot) {}
const DenseSet<DebugAggregate> *VarsWithStackSlot,
bool CoalesceAdjacentFragments)
: Fn(Fn), VarsWithStackSlot(VarsWithStackSlot),
CoalesceAdjacentFragments(CoalesceAdjacentFragments) {}

/// Add variable locations to \p FnVarLocs so that any bits of a variable
/// with a memory location have that location explicitly reinstated at each
Expand Down Expand Up @@ -864,8 +922,10 @@ class MemLocFragmentFill {

for (auto &FragMemLoc : FragMemLocs) {
DIExpression *Expr = DIExpression::get(Ctx, std::nullopt);
Expr = *DIExpression::createFragmentExpression(
Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
if (FragMemLoc.SizeInBits !=
*Aggregates[FragMemLoc.Var].first->getSizeInBits())
Expr = *DIExpression::createFragmentExpression(
Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
Expr = DIExpression::prepend(Expr, DIExpression::DerefAfter,
FragMemLoc.OffsetInBits / 8);
DebugVariable Var(Aggregates[FragMemLoc.Var].first, Expr,
Expand Down Expand Up @@ -2455,7 +2515,8 @@ static void analyzeFunction(Function &Fn, const DataLayout &Layout,
}

if (Changed) {
MemLocFragmentFill Pass(Fn, &VarsWithStackSlot);
MemLocFragmentFill Pass(Fn, &VarsWithStackSlot,
shouldCoalesceFragments(Fn));
Pass.run(FnVarLocs);

// Remove redundant entries. As well as reducing memory consumption and
Expand Down
104 changes: 104 additions & 0 deletions llvm/test/DebugInfo/assignment-tracking/X86/coalesce-cfg.ll
@@ -0,0 +1,104 @@
; RUN: llc %s -o - -stop-after=finalize-isel \
; RUN: | FileCheck %s --implicit-check-not=DBG_

;; Test coalescing of contiguous fragments in adjacent location definitions.
;; Further details and check directives inline.

target triple = "x86_64-unknown-linux-gnu"

@cond = dso_local global i8 0, align 1

;; The final store and linked dbg.assign indicate the whole variable is located
;; on the stack. Coalesce the two fragment defs that are generated (0-32,
;; 32-64) at the final dbg.assign into one (0-64, which covers the whole
;; variable meaning we don't need a fragment expression). And check the two
;; DBG_VALUEs in if.then are not coalesced, since they specify different
;; locations. This is the same as the first test in coalesce-simple.ll except
;; the dbg intrinsics are split up over a simple diamond CFG to check the info
;; is propagated betweeen blocks correctly.

; CHECK-LABEL: bb.0.entry:
; CHECK-NEXT: successors:
; CHECK-NEXT: {{^ *$}}
; CHECK-NEXT: DBG_VALUE %stack.0.a, $noreg, ![[#]], !DIExpression(DW_OP_deref)
; CHECK-NEXT: TEST8mi
; CHECK-NEXT: JCC_1 %bb.2
; CHECK-NEXT: JMP_1 %bb.1

; CHECK-LABEL: bb.1.if.then:
; CHECK-NEXT: successors:
; CHECK-NEXT: {{^ *$}}
; CHECK-NEXT: MOV8mi $rip, 1, $noreg, @cond, $noreg, 0 :: (store (s8) into @cond)
; CHECK-NEXT: DBG_VALUE 1, $noreg, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32)
; CHECK-NEXT: DBG_VALUE %stack.0.a, $noreg, ![[#]], !DIExpression(DW_OP_plus_uconst, 4, DW_OP_deref, DW_OP_LLVM_fragment, 32, 32)
; CHECK-NEXT: JMP_1 %bb.3

; CHECK-LABEL: bb.2.if.else:
; CHECK-NEXT: successors:
; CHECK-NEXT: {{^ *$}}
; CHECK-NEXT: MOV8mi $rip, 1, $noreg, @cond, $noreg, 1 :: (store (s8) into @cond)

; CHECK-LABEL: bb.3.if.end:
; CHECK-NEXT: MOV32mi %stack.0.a, 1, $noreg, 0, $noreg, 5 :: (store (s32) into %ir.a, align 8)
; CHECK-NEXT: DBG_VALUE %stack.0.a, $noreg, ![[#]], !DIExpression(DW_OP_deref)
; CHECK-NEXT: RET 0

define dso_local void @_Z3funv() local_unnamed_addr !dbg !16 {
entry:
%a = alloca i64, !DIAssignID !37
call void @llvm.dbg.assign(metadata i64 poison, metadata !20, metadata !DIExpression(), metadata !37, metadata ptr %a, metadata !DIExpression()), !dbg !25
%0 = load i8, ptr @cond, align 1
%tobool = trunc i8 %0 to i1
br i1 %tobool, label %if.then, label %if.else

if.then:
store i1 false, ptr @cond
call void @llvm.dbg.value(metadata i32 1, metadata !20, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !25
br label %if.end

if.else:
store i1 true, ptr @cond
br label %if.end

if.end:
store i32 5, ptr %a, !DIAssignID !38
call void @llvm.dbg.assign(metadata i32 5, metadata !20, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata !38, metadata ptr %a, metadata !DIExpression()), !dbg !25
ret void
}

declare void @llvm.dbg.value(metadata, metadata, metadata)
declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)

!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!8, !9, !10, !11, !12, !13, !14}
!llvm.ident = !{!15}

!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "G", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
!3 = !DIFile(filename: "test.cpp", directory: "/")
!4 = !{!0, !5}
!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression())
!6 = distinct !DIGlobalVariable(name: "F", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true)
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!8 = !{i32 7, !"Dwarf Version", i32 5}
!9 = !{i32 2, !"Debug Info Version", i32 3}
!10 = !{i32 1, !"wchar_size", i32 4}
!11 = !{i32 8, !"PIC Level", i32 2}
!12 = !{i32 7, !"PIE Level", i32 2}
!13 = !{i32 7, !"uwtable", i32 2}
!14 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
!15 = !{!"clang version 17.0.0"}
!16 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !3, file: !3, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19)
!17 = !DISubroutineType(types: !18)
!18 = !{null}
!19 = !{!20}
!20 = !DILocalVariable(name: "X", scope: !16, file: !3, line: 4, type: !21)
!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair", file: !3, line: 2, size: 64, flags: DIFlagTypePassByValue, elements: !22, identifier: "_ZTS4Pair")
!22 = !{}
!25 = !DILocation(line: 0, scope: !16)
!26 = !DILocation(line: 7, column: 7, scope: !27)
!27 = distinct !DILexicalBlock(scope: !16, file: !3, line: 7, column: 7)
!28 = distinct !DIAssignID()
!37 = distinct !DIAssignID()
!38 = distinct !DIAssignID()
82 changes: 82 additions & 0 deletions llvm/test/DebugInfo/assignment-tracking/X86/coalesce-options.ll
@@ -0,0 +1,82 @@
;; Test coalescing of contiguous fragments in adjacent location definitions.
;; This test contains the first function from coalesce-simple.ll. Just use it
;; to check whether coalescing happens or not with different flag settings.
;;
;; +=================+==============================+======================+
;; | Coalescing flag | Instruction-Referencing flag | Coalescing behaviour |
;; +=================+==============================+======================+
;; | default | enabled | enabled |
;; | default | disabled | disabled |
;; | enabled | * | enabled |
;; | disabled | * | disabled |
;; +-----------------+------------------------------+----------------------+

;; Coalescing default + instructino-referencing enabled = enable.
; RUN: llc %s -o - -stop-after=finalize-isel -experimental-debug-variable-locations=true \
; RUN: | FileCheck %s --check-prefixes=CHECK,ENABLE

;; Coalescing default + instructino-referencing disabled = disable.
; RUN: llc %s -o - -stop-after=finalize-isel -experimental-debug-variable-locations=false \
; RUN: | FileCheck %s --check-prefixes=CHECK,DISABLE

;; Coalescing enabled + instructino-referencing disabled = enable.
; RUN: llc %s -o - -stop-after=finalize-isel -experimental-debug-variable-locations=false \
; RUN: -debug-ata-coalesce-frags=true \
; RUN: | FileCheck %s --check-prefixes=CHECK,ENABLE

;; Coalescing disabled + instructino-referencing enabled = disable.
; RUN: llc %s -o - -stop-after=finalize-isel -experimental-debug-variable-locations=true \
; RUN: -debug-ata-coalesce-frags=false \
; RUN: | FileCheck %s --check-prefixes=CHECK,DISABLE

; CHECK: MOV32mi %stack.0.a, 1, $noreg, 0, $noreg, 5
; ENABLE-NEXT: DBG_VALUE %stack.0.a, $noreg, ![[#]], !DIExpression(DW_OP_deref)
; DISABLE-NEXT: DBG_VALUE %stack.0.a, $noreg, ![[#]], !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 0, 32)

target triple = "x86_64-unknown-linux-gnu"

define dso_local void @_Z3funv() local_unnamed_addr !dbg !16 {
entry:
%a = alloca i64, !DIAssignID !37
call void @llvm.dbg.assign(metadata i64 poison, metadata !20, metadata !DIExpression(), metadata !37, metadata ptr %a, metadata !DIExpression()), !dbg !25
call void @llvm.dbg.value(metadata i32 1, metadata !20, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !25
store i32 5, ptr %a, !DIAssignID !38
call void @llvm.dbg.assign(metadata i32 5, metadata !20, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata !38, metadata ptr %a, metadata !DIExpression()), !dbg !25
ret void
}

declare void @llvm.dbg.value(metadata, metadata, metadata)
declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)

!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!8, !9, !10, !11, !12, !13, !14}
!llvm.ident = !{!15}

!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "G", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
!3 = !DIFile(filename: "test.cpp", directory: "/")
!4 = !{!0, !5}
!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression())
!6 = distinct !DIGlobalVariable(name: "F", scope: !2, file: !3, line: 1, type: !7, isLocal: false, isDefinition: true)
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!8 = !{i32 7, !"Dwarf Version", i32 5}
!9 = !{i32 2, !"Debug Info Version", i32 3}
!10 = !{i32 1, !"wchar_size", i32 4}
!11 = !{i32 8, !"PIC Level", i32 2}
!12 = !{i32 7, !"PIE Level", i32 2}
!13 = !{i32 7, !"uwtable", i32 2}
!14 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
!15 = !{!"clang version 17.0.0"}
!16 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !3, file: !3, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19)
!17 = !DISubroutineType(types: !18)
!18 = !{null}
!19 = !{!20}
!20 = !DILocalVariable(name: "X", scope: !16, file: !3, line: 4, type: !21)
!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair", file: !3, line: 2, size: 64, flags: DIFlagTypePassByValue, elements: !22, identifier: "_ZTS4Pair")
!22 = !{}
!25 = !DILocation(line: 0, scope: !16)
!26 = !DILocation(line: 7, column: 7, scope: !27)
!27 = distinct !DILexicalBlock(scope: !16, file: !3, line: 7, column: 7)
!37 = distinct !DIAssignID()
!38 = distinct !DIAssignID()

0 comments on commit d4879d7

Please sign in to comment.