diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 50ba00cf8df3e9..f521862b1a54c2 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -65,6 +65,7 @@ enum Tag : uint16_t; } class DbgVariableIntrinsic; +class DPValue; extern cl::opt EnableFSDiscriminator; @@ -3814,6 +3815,7 @@ class DebugVariable { public: DebugVariable(const DbgVariableIntrinsic *DII); + DebugVariable(const DPValue *DPV); DebugVariable(const DILocalVariable *Var, std::optional FragmentInfo, diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index c507346710b8d5..51950fc937f0ab 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DebugProgramInstruction.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Type.h" @@ -41,6 +42,11 @@ DebugVariable::DebugVariable(const DbgVariableIntrinsic *DII) Fragment(DII->getExpression()->getFragmentInfo()), InlinedAt(DII->getDebugLoc().getInlinedAt()) {} +DebugVariable::DebugVariable(const DPValue *DPV) + : Variable(DPV->getVariable()), + Fragment(DPV->getExpression()->getFragmentInfo()), + InlinedAt(DPV->getDebugLoc().getInlinedAt()) {} + DebugVariableAggregate::DebugVariableAggregate(const DbgVariableIntrinsic *DVI) : DebugVariable(DVI->getVariable(), std::nullopt, DVI->getDebugLoc()->getInlinedAt()) {} diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0b392e77abaa1e..3e42e1d0703806 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1972,6 +1972,69 @@ bool llvm::LowerDbgDeclare(Function &F) { return Changed; } +// RemoveDIs: re-implementation of insertDebugValuesForPHIs, but which pulls the +// debug-info out of the block's DPValues rather than dbg.value intrinsics. +static void insertDPValuesForPHIs(BasicBlock *BB, + SmallVectorImpl &InsertedPHIs) { + assert(BB && "No BasicBlock to clone DPValue(s) from."); + if (InsertedPHIs.size() == 0) + return; + + // Map existing PHI nodes to their DPValues. + DenseMap DbgValueMap; + for (auto &I : *BB) { + for (auto &DPV : I.getDbgValueRange()) { + for (Value *V : DPV.location_ops()) + if (auto *Loc = dyn_cast_or_null(V)) + DbgValueMap.insert({Loc, &DPV}); + } + } + if (DbgValueMap.size() == 0) + return; + + // Map a pair of the destination BB and old DPValue to the new DPValue, + // so that if a DPValue is being rewritten to use more than one of the + // inserted PHIs in the same destination BB, we can update the same DPValue + // with all the new PHIs instead of creating one copy for each. + MapVector, DPValue *> NewDbgValueMap; + // Then iterate through the new PHIs and look to see if they use one of the + // previously mapped PHIs. If so, create a new DPValue that will propagate + // the info through the new PHI. If we use more than one new PHI in a single + // destination BB with the same old dbg.value, merge the updates so that we + // get a single new DPValue with all the new PHIs. + for (auto PHI : InsertedPHIs) { + BasicBlock *Parent = PHI->getParent(); + // Avoid inserting a debug-info record into an EH block. + if (Parent->getFirstNonPHI()->isEHPad()) + continue; + for (auto VI : PHI->operand_values()) { + auto V = DbgValueMap.find(VI); + if (V != DbgValueMap.end()) { + DPValue *DbgII = cast(V->second); + auto NewDI = NewDbgValueMap.find({Parent, DbgII}); + if (NewDI == NewDbgValueMap.end()) { + DPValue *NewDbgII = DbgII->clone(); + NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first; + } + DPValue *NewDbgII = NewDI->second; + // If PHI contains VI as an operand more than once, we may + // replaced it in NewDbgII; confirm that it is present. + if (is_contained(NewDbgII->location_ops(), VI)) + NewDbgII->replaceVariableLocationOp(VI, PHI); + } + } + } + // Insert the new DPValues into their destination blocks. + for (auto DI : NewDbgValueMap) { + BasicBlock *Parent = DI.first.first; + DPValue *NewDbgII = DI.second; + auto InsertionPt = Parent->getFirstInsertionPt(); + assert(InsertionPt != Parent->end() && "Ill-formed basic block"); + + InsertionPt->DbgMarker->insertDPValue(NewDbgII, true); + } +} + /// Propagate dbg.value intrinsics through the newly inserted PHIs. void llvm::insertDebugValuesForPHIs(BasicBlock *BB, SmallVectorImpl &InsertedPHIs) { @@ -1979,6 +2042,8 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB, if (InsertedPHIs.size() == 0) return; + insertDPValuesForPHIs(BB, InsertedPHIs); + // Map existing PHI nodes to their dbg.values. ValueToValueMapTy DbgValueMap; for (auto &I : *BB) { diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index ae155ac082d811..cbef27d1ecfa68 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -159,7 +159,8 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug // intrinsics. SmallVector DbgValues; - llvm::findDbgValues(DbgValues, OrigHeaderVal); + SmallVector DPValues; + llvm::findDbgValues(DbgValues, OrigHeaderVal, &DPValues); for (auto &DbgValue : DbgValues) { // The original users in the OrigHeader are already using the original // definitions. @@ -180,6 +181,29 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, NewVal = UndefValue::get(OrigHeaderVal->getType()); DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal); } + + // RemoveDIs: duplicate implementation for non-instruction debug-info + // storage in DPValues. + for (DPValue *DPV : DPValues) { + // The original users in the OrigHeader are already using the original + // definitions. + BasicBlock *UserBB = DPV->getMarker()->getParent(); + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped and anything else can be handled by + // the SSAUpdater. To avoid adding PHINodes, check if the value is + // available in UserBB, if not substitute undef. + Value *NewVal; + if (UserBB == OrigPreheader) + NewVal = OrigPreHeaderVal; + else if (SSA.HasValueForBlock(UserBB)) + NewVal = SSA.GetValueInMiddleOfBlock(UserBB); + else + NewVal = UndefValue::get(OrigHeaderVal->getType()); + DPV->replaceVariableLocationOp(OrigHeaderVal, NewVal); + } } } @@ -531,6 +555,18 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { break; } + // Duplicate implementation for DPValues, the non-instruction format of + // debug-info records in RemoveDIs. + auto makeHashDPV = [](const DPValue &D) -> DbgIntrinsicHash { + auto VarLocOps = D.location_ops(); + return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()), + D.getVariable()}, + D.getExpression()}; + }; + for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) + for (const DPValue &DPV : I.getDbgValueRange()) + DbgIntrinsics.insert(makeHashDPV(DPV)); + // Remember the local noalias scope declarations in the header. After the // rotation, they must be duplicated and the scope must be cloned. This // avoids unwanted interaction across iterations. @@ -539,6 +575,29 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { if (auto *Decl = dyn_cast(&I)) NoAliasDeclInstructions.push_back(Decl); + Module *M = OrigHeader->getModule(); + + // Track the next DPValue to clone. If we have a sequence where an + // instruction is hoisted instead of being cloned: + // DPValue blah + // %foo = add i32 0, 0 + // DPValue xyzzy + // %bar = call i32 @foobar() + // where %foo is hoisted, then the DPValue "blah" will be seen twice, once + // attached to %foo, then when %foo his hoisted it will "fall down" onto the + // function call: + // DPValue blah + // DPValue xyzzy + // %bar = call i32 @foobar() + // causing it to appear attached to the call too. + // + // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from + // here" position to account for this behaviour. We point it at any DPValues + // on the next instruction, here labelled xyzzy, before we hoist %foo. + // Later, we only only clone DPValues from that position (xyzzy) onwards, + // which avoids cloning DPValue "blah" multiple times. + std::optional NextDbgInst = std::nullopt; + while (I != E) { Instruction *Inst = &*I++; @@ -551,7 +610,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && !Inst->isTerminator() && !isa(Inst) && !isa(Inst)) { + + if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) { + auto DbgValueRange = + LoopEntryBranch->cloneDebugInfoFrom(Inst, NextDbgInst); + RemapDPValueRange(M, DbgValueRange, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + } + + NextDbgInst = I->getDbgValueRange().begin(); Inst->moveBefore(LoopEntryBranch); + ++NumInstrsHoisted; continue; } @@ -562,6 +631,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { ++NumInstrsDuplicated; + if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) { + auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInst); + // Erase anything we've seen before. + for (DPValue &DPV : make_early_inc_range(Range)) + if (DbgIntrinsics.count(makeHashDPV(DPV))) + DPV.eraseFromParent(); + RemapDPValueRange(M, Range, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + NextDbgInst = std::nullopt; + } + // Eagerly remap the operands of the instruction. RemapInstruction(C, ValueMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); @@ -676,6 +756,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // OrigPreHeader's old terminator (the original branch into the loop), and // remove the corresponding incoming values from the PHI nodes in OrigHeader. LoopEntryBranch->eraseFromParent(); + OrigPreheader->flushTerminatorDbgValues(); // Update MemorySSA before the rewrite call below changes the 1:1 // instruction:cloned_instruction_or_value mapping. diff --git a/llvm/test/Transforms/LoopRotate/dbgvalue.ll b/llvm/test/Transforms/LoopRotate/dbgvalue.ll index 96168898dfd734..92cc886bc81c10 100644 --- a/llvm/test/Transforms/LoopRotate/dbgvalue.ll +++ b/llvm/test/Transforms/LoopRotate/dbgvalue.ll @@ -1,14 +1,37 @@ -; RUN: opt -S -passes=loop-rotate -verify-memoryssa < %s | FileCheck %s +; RUN: opt -S -passes=loop-rotate -verify-memoryssa < %s | FileCheck %s --implicit-check-not=dbg.value +; RUN: opt -S -passes=loop-rotate -verify-memoryssa < %s --try-experimental-debuginfo-iterators | FileCheck %s --implicit-check-not=dbg.value declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone +; CHECK: declare void @llvm.dbg.value(metadata, + +; This function rotates the exit conditon into the entry block, moving the +; dbg.values with it. Check that they resolve through the PHIs to the arguments +; only in the entry block. In the loop block, the dbg.values shift down below +; the calls and resolve to them. Then even more dbg.values are inserted on the +; newly produced PHIs at the start. + define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 { ; CHECK-LABEL: define i32 @tak( ; CHECK: entry ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x -; CHECK: tail call void @llvm.dbg.value(metadata i32 %call - +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %y +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %z +; CHECK: if.then.lr.ph: +; CHECK: if.then: +; CHECK-NEXT: %z.tr4 = phi +; CHECK-NEXT: %y.tr3 = phi +; CHECK-NEXT: %x.tr2 = phi +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %z.tr4 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %y.tr3 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x.tr2 +; CHECK: %call = tail call i32 @tak(i32 +; CHECK: %call9 = tail call i32 @tak(i32 +; CHECK: %call14 = tail call i32 @tak(i32 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call9 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call14 entry: br label %tailrecurse @@ -38,12 +61,87 @@ return: ; preds = %if.end ret i32 %z.tr, !dbg !17 } -define i32 @tak2(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !21 { -; CHECK-LABEL: define i32 @tak2( +; Repeat of the tak function, with only one DILocalVariable, checking that we +; don't insert duplicate debug intrinsics. The initial duplicates are preserved. +; FIXME: this test checks for the de-duplication behaviour that loop-rotate +; has today, however it might not be correct. In the if.then block the preserved +; dbg.value is for %x -- should not the _last_dbg.value, for %z, have been +; preserved? +define i32 @tak_dup(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !50 { +; CHECK-LABEL: define i32 @tak_dup( ; CHECK: entry -; CHECK: tail call void @llvm.dbg.value(metadata i32 %x.tr -; CHECK: tail call void @llvm.dbg.value(metadata i32 undef +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %y +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %z +; CHECK: if.then.lr.ph: +; CHECK: if.then: +; CHECK-NEXT: %z.tr4 = phi +; CHECK-NEXT: %y.tr3 = phi +; CHECK-NEXT: %x.tr2 = phi +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x.tr2 +; CHECK: %call = tail call i32 @tak(i32 +; CHECK: %call9 = tail call i32 @tak(i32 +; CHECK: %call14 = tail call i32 @tak(i32 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call14 +entry: + br label %tailrecurse +tailrecurse: ; preds = %if.then, %entry + %x.tr = phi i32 [ %x, %entry ], [ %call, %if.then ] + %y.tr = phi i32 [ %y, %entry ], [ %call9, %if.then ] + %z.tr = phi i32 [ %z, %entry ], [ %call14, %if.then ] + tail call void @llvm.dbg.value(metadata i32 %x.tr, metadata !60, metadata !DIExpression()), !dbg !61 + tail call void @llvm.dbg.value(metadata i32 %y.tr, metadata !60, metadata !DIExpression()), !dbg !61 + tail call void @llvm.dbg.value(metadata i32 %z.tr, metadata !60, metadata !DIExpression()), !dbg !61 + %cmp = icmp slt i32 %y.tr, %x.tr, !dbg !62 + br i1 %cmp, label %if.then, label %if.end, !dbg !62 + +if.then: ; preds = %tailrecurse + %sub = sub nsw i32 %x.tr, 1, !dbg !64 + %call = tail call i32 @tak(i32 %sub, i32 %y.tr, i32 %z.tr), !dbg !64 + %sub6 = sub nsw i32 %y.tr, 1, !dbg !64 + %call9 = tail call i32 @tak(i32 %sub6, i32 %z.tr, i32 %x.tr), !dbg !64 + %sub11 = sub nsw i32 %z.tr, 1, !dbg !64 + %call14 = tail call i32 @tak(i32 %sub11, i32 %x.tr, i32 %y.tr), !dbg !64 + br label %tailrecurse + +if.end: ; preds = %tailrecurse + br label %return, !dbg !66 + +return: ; preds = %if.end + ret i32 %z.tr, !dbg !67 +} + +; Check that the dbg.values move up to being immediately below the PHIs, +; using their Values. However once we exit the loop, the x and y values +; become irrelevant and undef, only z gets an LCSSA PHI to preserve it. +; +; Note that while the icmp is initially undominated by any dbg.value and thus +; shouldn't get a variable location, the first iteration is peeled off into the +; entry block. It's then safe to have it dominated by subsequent dbg.values as +; every path to the icmp is preceeded by a dbg.value. +; +; FIXME: could we choose to preserve more information about the loop, x and y +; might not be live out of the loop, but they might still be dominated by a +; describable Value. + +define i32 @tak2(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !21 { +; CHECK-LABEL: define i32 @tak2( +; CHECK: if.then: +; CHECK-NEXT: %z.tr4 = phi i32 +; CHECK-NEXT: %y.tr3 = phi i32 +; CHECK-NEXT: %x.tr2 = phi i32 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %x.tr2 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %y.tr3 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %z.tr4 +; CHECK: tail call i32 @tak(i32 +; CHECK: tail call i32 @tak(i32 +; CHECK: tail call i32 @tak(i32 +; CHECK: if.end: +; CHECK-NEXT: z.tr.lcssa = phi i32 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 undef +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 undef +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %z.tr.lcssa entry: br label %tailrecurse @@ -120,6 +218,53 @@ for.end: ret void } +; Test that dbg.value intrinsincs adjacent to the `icmp slt i32 0, 0` get +; rotated as expected. The icmp is loop-invariant and so gets hoisted to the +; preheader via a different code path. This is more difficult for DPValue +; debug-info records to handle, because they have to get detached and moved +; somewhere else during rotation. +define void @invariant_hoist() !dbg !70 { +; CHECK-LABEL: define void @invariant_hoist() +; CHECK: entry: +; CHECK-NEXT: br label %L0.preheader +; CHECK: L0.preheader: +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, +; CHECK-NEXT: %cmp = icmp slt i32 0, 0, +; CHECK: L1.preheader: +; CHECK-NEXT: %spec.select3 = phi i32 +; CHECK-NEXT: %k.02 = phi i32 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %k.02, +; CHECK: L0.latch: +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %spec.select3, +entry: + br label %L0.preheader, !dbg !77 + +L0.preheader: + br label %L0, !dbg !77 + +L0: + %k.0 = phi i32 [ 0, %L0.preheader ], [ %spec.select, %L0.latch ] + call void @llvm.dbg.value(metadata i32 %k.0, metadata !80, metadata !DIExpression()), !dbg !77 + %cmp = icmp slt i32 0, 0, !dbg !77 + %inc = zext i1 %cmp to i32, !dbg !77 + %spec.select = add nsw i32 %k.0, %inc, !dbg !77 + %tobool3.not = icmp eq i32 %spec.select, 0, !dbg !77 + br i1 %tobool3.not, label %L0.preheader, label %L1.preheader, !dbg !77 + +L1.preheader: + %tobool8.not = icmp eq i32 %k.0, 0, !dbg !77 + br label %L1, !dbg !77 + +L1: + br i1 %tobool8.not, label %L1.latch, label %L0.latch, !dbg !77 + +L1.latch: + br i1 false, label %L1, label %L0.latch, !dbg !77 + +L0.latch: + br label %L0, !dbg !77 +} + !llvm.module.flags = !{!20} !llvm.dbg.cu = !{!2} @@ -156,3 +301,25 @@ for.end: !39 = !DILocation(line: 32, column: 20, scope: !21) !40 = !DILocalVariable(name: "z", line: 32, arg: 3, scope: !21, file: !1, type: !5) !41 = !DILocation(line: 32, column: 27, scope: !21) +!50 = distinct !DISubprogram(name: "tak_dup", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !18, scope: !1, type: !3) +!57 = !DILocation(line: 32, column: 13, scope: !50) +!59 = !DILocation(line: 32, column: 20, scope: !50) +!60 = !DILocalVariable(name: "z", line: 32, arg: 3, scope: !50, file: !1, type: !5) +!61 = !DILocation(line: 32, column: 27, scope: !50) +!62 = !DILocation(line: 33, column: 3, scope: !63) +!63 = distinct !DILexicalBlock(line: 32, column: 30, file: !18, scope: !50) +!64 = !DILocation(line: 34, column: 5, scope: !65) +!65 = distinct !DILexicalBlock(line: 33, column: 14, file: !18, scope: !63) +!66 = !DILocation(line: 36, column: 3, scope: !63) +!67 = !DILocation(line: 37, column: 1, scope: !63) +!70 = distinct !DISubprogram(name: "invariant_hoist", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !18, scope: !1, type: !3) +!77 = !DILocation(line: 32, column: 13, scope: !70) +!79 = !DILocation(line: 32, column: 20, scope: !70) +!80 = !DILocalVariable(name: "z", line: 32, arg: 3, scope: !70, file: !1, type: !5) +!81 = !DILocation(line: 32, column: 27, scope: !70) +!82 = !DILocation(line: 33, column: 3, scope: !83) +!83 = distinct !DILexicalBlock(line: 32, column: 30, file: !18, scope: !70) +!84 = !DILocation(line: 34, column: 5, scope: !85) +!85 = distinct !DILexicalBlock(line: 33, column: 14, file: !18, scope: !83) +!86 = !DILocation(line: 36, column: 3, scope: !83) +!87 = !DILocation(line: 37, column: 1, scope: !83)