Skip to content

Commit

Permalink
Salvage debug info from instructions about to be deleted
Browse files Browse the repository at this point in the history
[Reapplies r297971 and punting on finding a better API for findDbgValues()]

This patch improves debug info quality in InstCombine by looking at
values that are about to be deleted, checking whether there are any
dbg.value instrinsics referring to them, and potentially encoding the
semantics of the deleted instruction into the dbg.value's
DIExpression.

In the example in the testcase (which was extracted from XNU) there is a sequence of

 %4 = load %struct.entry*, %struct.entry** %next2, align 8, !dbg !41
 %5 = bitcast %struct.entry* %4 to i8*, !dbg !42
 %add.ptr4 = getelementptr inbounds i8, i8* %5, i64 -8, !dbg !43
 %6 = bitcast i8* %add.ptr4 to %struct.entry*, !dbg !44
 call void @llvm.dbg.value(metadata %struct.entry* %6, i64 0, metadata !20, metadata !21), !dbg 34

When these instructions are eliminated by instcombine one after
another, we can still salvage the otherwise dead debug info:

- Bitcasts have no effect, so have the dbg.value point to operand(0)
- Loads can be expressed via a DW_OP_deref
- Constant gep instructions can be replaced by DWARF expression arithmetic

The API introduced by this patch is not specific to instcombine and
can be useful in other places, too.

rdar://problem/30725338

Differential Revision: https://reviews.llvm.org/D30919

llvm-svn: 297994
  • Loading branch information
adrian-prantl committed Mar 16, 2017
1 parent 2da2bfa commit 47ea647
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 25 deletions.
10 changes: 9 additions & 1 deletion llvm/include/llvm/Transforms/Utils/Local.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,12 @@ bool LowerDbgDeclare(Function &F);
/// Finds the llvm.dbg.declare intrinsic corresponding to an alloca, if any.
DbgDeclareInst *FindAllocaDbgDeclare(Value *V);

/// Finds the llvm.dbg.value intrinsics describing a value, if any.
/// Finds the llvm.dbg.value intrinsics describing a value.
void findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V);

/// Constants for \p replaceDbgDeclare and friends.
enum { NoDeref = false, WithDeref = true };

/// Replaces llvm.dbg.declare instruction when the address it describes
/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
/// prepended to the expression. If Offset is non-zero, a constant displacement
Expand All @@ -310,6 +313,11 @@ bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
void replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
DIBuilder &Builder, int Offset = 0);

/// Assuming the instruction \p I is going to be deleted, attempt to salvage any
/// dbg.value intrinsics referring to \p I by rewriting its effect into a
/// DIExpression.
void salvageDebugInfo(Instruction &I);

/// Remove all instructions from a basic block other than it's terminator
/// and any present EH pad instructions.
unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/IR/DIBuilder.h"

#define DEBUG_TYPE "instcombine"

Expand Down Expand Up @@ -470,8 +473,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
/// methods should return the value returned by this function.
Instruction *eraseInstFromFunction(Instruction &I) {
DEBUG(dbgs() << "IC: ERASE " << I << '\n');

assert(I.use_empty() && "Cannot erase instruction that is used!");
salvageDebugInfo(I);

// Make sure that we reprocess all operands now that we reduced their
// use counts.
if (I.getNumOperands() < 8) {
Expand Down
89 changes: 66 additions & 23 deletions llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1259,36 +1259,32 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}

static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) {
Expr.push_back(dwarf::DW_OP_deref);
}

static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) {
static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
if (Offset > 0) {
Expr.push_back(dwarf::DW_OP_plus);
Expr.push_back(Offset);
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Offset);
} else if (Offset < 0) {
Expr.push_back(dwarf::DW_OP_minus);
Expr.push_back(-Offset);
Ops.push_back(dwarf::DW_OP_minus);
Ops.push_back(-Offset);
}
}

static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder,
DIExpression *DIExpr, bool Deref,
int Offset) {
/// Prepend \p DIExpr with a deref and offset operation.
static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
bool Deref, int64_t Offset) {
if (!Deref && !Offset)
return DIExpr;
// Create a copy of the original DIDescriptor for user variable, prepending
// "deref" operation to a list of address elements, as new llvm.dbg.declare
// will take a value storing address of the memory for variable, not
// alloca itself.
SmallVector<uint64_t, 4> NewDIExpr;
SmallVector<uint64_t, 4> Ops;
if (Deref)
DIExprAddDeref(NewDIExpr);
DIExprAddOffset(NewDIExpr, Offset);
Ops.push_back(dwarf::DW_OP_deref);
appendOffset(Ops, Offset);
if (DIExpr)
NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
return Builder.createExpression(NewDIExpr);
Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
return Builder.createExpression(Ops);
}

bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Expand All @@ -1302,7 +1298,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");

DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset);
DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset);

// Insert llvm.dbg.declare immediately after the original alloca, and remove
// old llvm.dbg.declare.
Expand Down Expand Up @@ -1334,11 +1330,11 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
// Insert the offset immediately after the first deref.
// We could just change the offset argument of dbg.value, but it's unsigned...
if (Offset) {
SmallVector<uint64_t, 4> NewDIExpr;
DIExprAddDeref(NewDIExpr);
DIExprAddOffset(NewDIExpr, Offset);
NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
DIExpr = Builder.createExpression(NewDIExpr);
SmallVector<uint64_t, 4> Ops;
Ops.push_back(dwarf::DW_OP_deref);
appendOffset(Ops, Offset);
Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
DIExpr = Builder.createExpression(Ops);
}

Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
Expand All @@ -1357,6 +1353,53 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
}
}

void llvm::salvageDebugInfo(Instruction &I) {
SmallVector<DbgValueInst *, 1> DbgValues;
auto &M = *I.getModule();

auto MDWrap = [&](Value *V) {
return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
};

if (auto *BitCast = dyn_cast<BitCastInst>(&I)) {
findDbgValues(DbgValues, BitCast);
for (auto *DVI : DbgValues) {
// Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value
// to use the cast's source.
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
}
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
findDbgValues(DbgValues, GEP);
for (auto *DVI : DbgValues) {
unsigned BitWidth =
M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
APInt Offset(BitWidth, 0);
// Rewrite a constant GEP into a DIExpression.
if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
// GEP offsets are i32 and thus alwaus fit into an int64_t.
DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue());
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
}
}
} else if (auto *Load = dyn_cast<LoadInst>(&I)) {
findDbgValues(DbgValues, Load);
for (auto *DVI : DbgValues) {
// Rewrite the load into DW_OP_deref.
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0);
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
}
}
}

unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
unsigned NumDeadInst = 0;
// Delete the instructions backwards, as it has a reduced likelihood of
Expand Down
106 changes: 106 additions & 0 deletions llvm/test/Transforms/InstCombine/debuginfo-dce.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
; RUN: opt -instcombine %s -S -o - | FileCheck %s
; Verify that the eliminated instructions (bitcast, gep, load) are salvaged into
; a DIExpression.
;
; Originally created from the following C source and then heavily isolated/reduced.
;
; struct entry {
; struct entry *next;
; };
; void scan(struct entry *queue, struct entry *end)
; {
; struct entry *entry;
; for (entry = (struct entry *)((char *)(queue->next) - 8);
; &entry->next == end;
; entry = (struct entry *)((char *)(entry->next) - 8)) {
; }
; }

; ModuleID = '<stdin>'
source_filename = "test.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"

%struct.entry = type { %struct.entry* }

; Function Attrs: nounwind ssp uwtable
define void @salvage_load(%struct.entry** %queue) local_unnamed_addr #0 !dbg !14 {
entry:
%im_not_dead = alloca %struct.entry*
%0 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
%1 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
call void @llvm.dbg.value(metadata %struct.entry* %1, i64 0, metadata !18, metadata !20), !dbg !19
; CHECK: define void @salvage_load
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry** %queue, i64 0,
; CHECK-SAME: metadata ![[LOAD_EXPR:[0-9]+]])
store %struct.entry* %1, %struct.entry** %im_not_dead, align 8
ret void, !dbg !21
}

; Function Attrs: nounwind ssp uwtable
define void @salvage_bitcast(%struct.entry* %queue) local_unnamed_addr #0 !dbg !14 {
entry:
%im_not_dead = alloca i8*
%0 = bitcast %struct.entry* %queue to i8*, !dbg !19
%1 = bitcast %struct.entry* %queue to i8*, !dbg !19
call void @llvm.dbg.value(metadata i8* %1, i64 0, metadata !18, metadata !20), !dbg !19
; CHECK: define void @salvage_bitcast
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0,
; CHECK-SAME: metadata ![[BITCAST_EXPR:[0-9]+]])
store i8* %1, i8** %im_not_dead, align 8
ret void, !dbg !21
}

; Function Attrs: nounwind ssp uwtable
define void @salvage_gep(%struct.entry* %queue, %struct.entry* %end) local_unnamed_addr #0 !dbg !14 {
entry:
%im_not_dead = alloca %struct.entry**
%0 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19
%1 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19
call void @llvm.dbg.value(metadata %struct.entry** %1, i64 0, metadata !18, metadata !20), !dbg !19
; CHECK: define void @salvage_gep
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0,
; CHECK-SAME: metadata ![[GEP_EXPR:[0-9]+]])
store %struct.entry** %1, %struct.entry*** %im_not_dead, align 8
ret void, !dbg !21
}

; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus, 0)
; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus, 0)
; CHECK: ![[GEP_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_plus, 0)

; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1

attributes #0 = { nounwind ssp uwtable }
attributes #1 = { nounwind readnone }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10, !11, !12}
!llvm.ident = !{!13}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
!1 = !DIFile(filename: "test.c", directory: "/")
!2 = !{}
!3 = !{!4, !8}
!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "entry", file: !1, line: 1, size: 64, elements: !6)
!6 = !{!7}
!7 = !DIDerivedType(tag: DW_TAG_member, name: "next", scope: !5, file: !1, line: 2, baseType: !4, size: 64)
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
!9 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{i32 1, !"PIC Level", i32 2}
!13 = !{!"clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)"}
!14 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17)
!15 = !DISubroutineType(types: !16)
!16 = !{null, !4, !4}
!17 = !{!18}
!18 = !DILocalVariable(name: "entry", scope: !14, file: !1, line: 6, type: !4)
!19 = !DILocation(line: 6, column: 17, scope: !14)
!20 = !DIExpression(DW_OP_plus, 0)
!21 = !DILocation(line: 11, column: 1, scope: !14)

0 comments on commit 47ea647

Please sign in to comment.