82 changes: 81 additions & 1 deletion llvm/lib/CodeGen/PseudoProbeInserter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Target/TargetMachine.h"
#include <unordered_map>
#include <unordered_set>

#define DEBUG_TYPE "pseudo-probe-inserter"

Expand All @@ -47,7 +48,10 @@ class PseudoProbeInserter : public MachineFunctionPass {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
MachineInstr *FirstInstr = nullptr;
for (MachineInstr &MI : MBB) {
if (!MI.isPseudo())
FirstInstr = &MI;
if (MI.isCall()) {
if (DILocation *DL = MI.getDebugLoc()) {
auto Value = DL->getDiscriminator();
Expand All @@ -65,8 +69,84 @@ class PseudoProbeInserter : public MachineFunctionPass {
}
}
}

// Walk the block backwards, move PSEUDO_PROBE before the first real
// instruction to fix out-of-order probes. There is a problem with probes
// as the terminator of the block. During the offline counts processing,
// the samples collected on the first physical instruction following a
// probe will be counted towards the probe. This logically equals to
// treating the instruction next to a probe as if it is from the same
// block of the probe. This is accurate most of the time unless the
// instruction can be reached from multiple flows, which means it actually
// starts a new block. Samples collected on such probes may cause
// imprecision with the counts inference algorithm. Fortunately, if
// there are still other native instructions preceding the probe we can
// use them as a place holder to collect samples for the probe.
if (FirstInstr) {
auto MII = MBB.rbegin();
while (MII != MBB.rend()) {
// Skip all pseudo probes followed by a real instruction since they
// are not dangling.
if (!MII->isPseudo())
break;
auto Cur = MII++;
if (Cur->getOpcode() != TargetOpcode::PSEUDO_PROBE)
continue;
// Move the dangling probe before FirstInstr.
auto *ProbeInstr = &*Cur;
MBB.remove(ProbeInstr);
MBB.insert(FirstInstr, ProbeInstr);
Changed = true;
}
} else {
// Probes not surrounded by any real instructions in the same block are
// called dangling probes. Since there's no good way to pick up a sample
// collection point for dangling probes at compile time, they are being
// tagged so that the profile correlation tool will not report any
// samples collected for them and it's up to the counts inference tool
// to get them a reasonable count.
for (MachineInstr &MI : MBB) {
if (MI.isPseudoProbe())
MI.addPseudoProbeAttribute(PseudoProbeAttributes::Dangling);
}
}
}

// Remove redundant dangling probes. Same dangling probes are redundant
// since they all have the same semantic that is to rely on the counts
// inference too to get reasonable count for the same original block.
// Therefore, there's no need to keep multiple copies of them.
auto Hash = [](const MachineInstr *MI) {
return std::hash<uint64_t>()(MI->getOperand(0).getImm()) ^
std::hash<uint64_t>()(MI->getOperand(1).getImm());
};

auto IsEqual = [](const MachineInstr *Left, const MachineInstr *Right) {
return Left->getOperand(0).getImm() == Right->getOperand(0).getImm() &&
Left->getOperand(1).getImm() == Right->getOperand(1).getImm() &&
Left->getOperand(3).getImm() == Right->getOperand(3).getImm() &&
Left->getDebugLoc() == Right->getDebugLoc();
};

SmallVector<MachineInstr *, 4> ToBeRemoved;
std::unordered_set<MachineInstr *, decltype(Hash), decltype(IsEqual)>
DanglingProbes(0, Hash, IsEqual);

for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
if (MI.isPseudoProbe()) {
if ((uint32_t)MI.getPseudoProbeAttribute() &
(uint32_t)PseudoProbeAttributes::Dangling)
if (!DanglingProbes.insert(&MI).second)
ToBeRemoved.push_back(&MI);
}
}
}

for (auto *MI : ToBeRemoved)
MI->eraseFromParent();

Changed |= !ToBeRemoved.empty();
return Changed;
}

Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/CodeGen/TailDuplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
return false;
if (TailBB->pred_empty())
return false;
MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(true);
if (I == TailBB->end())
return true;
return I->isUnconditionalBranch();
Expand Down Expand Up @@ -779,6 +779,12 @@ bool TailDuplicator::duplicateSimpleBB(
assert(PredBB->succ_size() <= 1);
}

// For AutoFDO, since BB is going to be removed, we won't be able to sample
// it. To avoid assigning a zero weight for BB, move all its pseudo probes
// into Succ and mark them dangling. This should allow the counts inference
// a chance to get a more reasonable weight for BB.
TailBB->moveAndDanglePseudoProbes(PredBB);

if (PredTBB)
TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL);

Expand Down
71 changes: 71 additions & 0 deletions llvm/lib/IR/PseudoProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include <unordered_set>

using namespace llvm;

Expand Down Expand Up @@ -96,4 +97,74 @@ void setProbeDistributionFactor(Instruction &Inst, float Factor) {
}
}
}

void addPseudoProbeAttribute(PseudoProbeInst &Inst,
PseudoProbeAttributes Attr) {
IRBuilder<> Builder(&Inst);
uint32_t OldAttr = Inst.getAttributes()->getZExtValue();
uint32_t NewAttr = OldAttr | (uint32_t)Attr;
if (OldAttr != NewAttr)
Inst.replaceUsesOfWith(Inst.getAttributes(), Builder.getInt32(NewAttr));
}

/// A block emptied (i.e., with all instructions moved out of it) won't be
/// sampled at run time. In such cases, AutoFDO will be informed of zero samples
/// collected for the block. This is not accurate and could lead to misleading
/// weights assigned for the block. A way to mitigate that is to treat such
/// block as having unknown counts in the AutoFDO profile loader and allow the
/// counts inference tool a chance to calculate a relatively reasonable weight
/// for it. This can be done by moving all pseudo probes in the emptied block
/// i.e, /c From, to before /c To and tag them dangling. Note that this is
/// not needed for dead blocks which really have a zero weight. It's per
/// transforms to decide whether to call this function or not.
bool moveAndDanglePseudoProbes(BasicBlock *From, Instruction *To) {
SmallVector<PseudoProbeInst *, 4> ToBeMoved;
for (auto &I : *From) {
if (auto *II = dyn_cast<PseudoProbeInst>(&I)) {
addPseudoProbeAttribute(*II, PseudoProbeAttributes::Dangling);
ToBeMoved.push_back(II);
}
}

for (auto *I : ToBeMoved)
I->moveBefore(To);

return !ToBeMoved.empty();
}

/// Same dangling probes in one blocks are redundant since they all have the
/// same semantic that is to rely on the counts inference too to get reasonable
/// count for the same original block. Therefore, there's no need to keep
/// multiple copies of them.
bool removeRedundantPseudoProbes(BasicBlock *Block) {

auto Hash = [](const PseudoProbeInst *I) {
return std::hash<uint64_t>()(I->getFuncGuid()->getZExtValue()) ^
std::hash<uint64_t>()(I->getIndex()->getZExtValue());
};

auto IsEqual = [](const PseudoProbeInst *Left, const PseudoProbeInst *Right) {
return Left->getFuncGuid() == Right->getFuncGuid() &&
Left->getIndex() == Right->getIndex() &&
Left->getAttributes() == Right->getAttributes() &&
Left->getDebugLoc() == Right->getDebugLoc();
};

SmallVector<PseudoProbeInst *, 4> ToBeRemoved;
std::unordered_set<PseudoProbeInst *, decltype(Hash), decltype(IsEqual)>
DanglingProbes(0, Hash, IsEqual);

for (auto &I : *Block) {
if (auto *II = dyn_cast<PseudoProbeInst>(&I)) {
if (II->getAttributes()->getZExtValue() &
(uint32_t)PseudoProbeAttributes::Dangling)
if (!DanglingProbes.insert(II).second)
ToBeRemoved.push_back(II);
}
}

for (auto *I : ToBeRemoved)
I->eraseFromParent();
return !ToBeRemoved.empty();
}
} // namespace llvm
21 changes: 21 additions & 0 deletions llvm/lib/ProfileData/SampleProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,27 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
return OS;
}

/// Merge the samples in \p Other into this record.
/// Optionally scale sample counts by \p Weight.
sampleprof_error SampleRecord::merge(const SampleRecord &Other,
uint64_t Weight) {
sampleprof_error Result;
// With pseudo probes, merge a dangling sample with a non-dangling sample
// should result in a dangling sample.
if (FunctionSamples::ProfileIsProbeBased &&
(getSamples() == FunctionSamples::InvalidProbeCount ||
Other.getSamples() == FunctionSamples::InvalidProbeCount)) {
NumSamples = FunctionSamples::InvalidProbeCount;
Result = sampleprof_error::success;
} else {
Result = addSamples(Other.getSamples(), Weight);
}
for (const auto &I : Other.getCallTargets()) {
MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
}
return Result;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LineLocation::dump() const { print(dbgs()); }
#endif
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/IPO/SampleProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,11 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
if (!Probe)
return std::error_code();

// Ignore danling probes since they are logically deleted and should not
// consume any profile samples.
if (Probe->isDangling())
return std::error_code();

const FunctionSamples *FS = findFunctionSamples(Inst);
if (!FS)
return std::error_code();
Expand Down
21 changes: 16 additions & 5 deletions llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,18 +401,29 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
ProbeFactorMap ProbeFactors;
for (auto &Block : F) {
for (auto &I : Block) {
if (Optional<PseudoProbe> Probe = extractProbe(I))
ProbeFactors[Probe->Id] += BBProfileCount(&Block);
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
// Do not count dangling probes since they are logically deleted and the
// current block that a dangling probe resides in doesn't reflect the
// execution count of the probe. The original samples of the probe will
// be distributed among the rest probes if there are any, this is
// less-than-deal but at least we don't lose any samples.
if (!Probe->isDangling())
ProbeFactors[Probe->Id] += BBProfileCount(&Block);
}
}
}

// Fix up over-counted probes.
for (auto &Block : F) {
for (auto &I : Block) {
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
float Sum = ProbeFactors[Probe->Id];
if (Sum != 0)
setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
// Ignore danling probes since they are logically deleted and should do
// not consume any profile samples in the subsequent profile annotation.
if (!Probe->isDangling()) {
float Sum = ProbeFactors[Probe->Id];
if (Sum != 0)
setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
}
}
}
}
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/Scalar/JumpThreading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,

// Jump threading may have introduced redundant debug values into BB
// which should be removed.
// Remove redundant pseudo probes as well.
if (Changed)
RemoveRedundantDbgInstrs(&BB);
RemoveRedundantDbgInstrs(&BB, true);

// Stop processing BB if it's the entry or is now deleted. The following
// routines attempt to eliminate BB and locating a suitable replacement
Expand Down Expand Up @@ -462,12 +463,12 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
BasicBlock *Succ = BI->getSuccessor(0);
if (
// The terminator must be the only non-phi instruction in BB.
BB.getFirstNonPHIOrDbg()->isTerminator() &&
BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
// Don't alter Loop headers and latches to ensure another pass can
// detect and transform nested loops later.
!LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
RemoveRedundantDbgInstrs(Succ);
RemoveRedundantDbgInstrs(Succ, true);
// BB is valid for cleanup here because we passed in DTU. F remains
// BB's parent until a DTU->getDomTree() event.
LVI->eraseBlock(&BB);
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
Expand Down Expand Up @@ -432,7 +433,7 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
return !ToBeRemoved.empty();
}

bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp) {
bool MadeChanges = false;
// By using the "backward scan" strategy before the "forward scan" strategy we
// can remove both dbg.value (2) and (3) in a situation like this:
Expand All @@ -447,6 +448,8 @@ bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
// already is described as having the value V1 at (1).
MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB);
MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB);
if (RemovePseudoOp)
MadeChanges |= removeRedundantPseudoProbes(BB);

if (MadeChanges)
LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: "
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
Expand Down Expand Up @@ -1122,6 +1123,12 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
for (BasicBlock *Pred : predecessors(BB))
Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);

// For AutoFDO, since BB is going to be removed, we won't be able to sample
// it. To avoid assigning a zero weight for BB, move all its pseudo probes
// into Succ and mark them dangling. This should allow the counts inference a
// chance to get a more reasonable weight for BB.
moveAndDanglePseudoProbes(BB, &*Succ->getFirstInsertionPt());

// Everything that jumped to BB now goes to Succ.
BB->replaceAllUsesWith(Succ);
if (!Succ->hasName()) Succ->takeName(BB);
Expand Down Expand Up @@ -2795,6 +2802,13 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
// TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to
// encode predicated DIExpressions that yield different results on different
// code paths.

// A hoisted conditional probe should be treated as dangling so that it will
// not be over-counted when the samples collected on the non-conditional path
// are counted towards the conditional path. We leave it for the counts
// inference algorithm to figure out a proper count for a danglng probe.
moveAndDanglePseudoProbes(BB, InsertPt);

for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
Instruction *I = &*II;
I->dropUnknownNonDebugMetadata();
Expand Down
14 changes: 10 additions & 4 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
Expand Down Expand Up @@ -2252,7 +2253,6 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// probability for ThenBB, which is fine since the optimization here takes
// place regardless of the branch probability.
if (isa<PseudoProbeInst>(I)) {
SpeculatedDbgIntrinsics.push_back(I);
continue;
}

Expand Down Expand Up @@ -2338,6 +2338,12 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
I.dropUnknownNonDebugMetadata();
}

// A hoisted conditional probe should be treated as dangling so that it will
// not be over-counted when the samples collected on the non-conditional path
// are counted towards the conditional path. We leave it for the counts
// inference algorithm to figure out a proper count for a danglng probe.
moveAndDanglePseudoProbes(ThenBB, BI);

// Hoist the instructions.
BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
ThenBB->begin(), std::prev(ThenBB->end()));
Expand Down Expand Up @@ -6226,7 +6232,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
Options.NeedCanonicalLoop &&
(!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
(is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
!NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
return true;
Expand Down Expand Up @@ -6287,8 +6293,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();

// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
auto I = BB->instructionsWithoutDebug().begin();
// Ignore dbg and pseudo intrinsics.
auto I = BB->instructionsWithoutDebug(true).begin();
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
return requestResimplify();
Expand Down
101 changes: 101 additions & 0 deletions llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
; REQUIRES: x86_64-linux
; RUN: opt < %s -passes='pseudo-probe,jump-threading' -S -o %t
; RUN: FileCheck %s < %t --check-prefix=JT
; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=asm | FileCheck %s --check-prefix=ASM
; RUN: opt < %s -passes='pseudo-probe' -S -o %t1
; RUN: llc -pseudo-probe-for-profiling -stop-after=tailduplication <%t1 | FileCheck %s --check-prefix=MIR-tail
; RUN: opt < %s -passes='pseudo-probe,simplifycfg' -S | FileCheck %s --check-prefix=SC

declare i32 @f1()

define i32 @foo(i1 %cond) {
; JT-LABEL: @foo(
; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
; ASM: pseudoprobe 6699318081062747564 1 0 0
%call = call i32 @f1()
br i1 %cond, label %T, label %F
T:
br label %Merge
F:
br label %Merge
Merge:
;; Check branch T and F are gone, and their probes (probe 2 and 3) are dangling.
; JT-LABEL-NO: T
; JT-LABEL-NO: F
; JT-LABEL: Merge
; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 2, i64 -1)
; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 2, i64 -1)
; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; ASM: .pseudoprobe 6699318081062747564 3 0 2
; ASM: .pseudoprobe 6699318081062747564 2 0 2
; ASM: .pseudoprobe 6699318081062747564 4 0 0
ret i32 %call
}

;; Check block T and F are gone, and their probes (probe 2 and 3) are dangling.
; MIR-tail: bb.0
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 2, 0, 2
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 3, 0, 2
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0


define void @foo2() {
bb:
%tmp = call i32 @f1()
%tmp1 = icmp eq i32 %tmp, 1
br i1 %tmp1, label %bb5, label %bb8

bb2:
%tmp4 = icmp ne i32 %tmp, 1
switch i1 %tmp4, label %bb2 [
i1 0, label %bb5
i1 1, label %bb8
]

bb5:
;; Check the pseudo probe with id 3 only has one copy.
; JT-COUNT-1: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
%tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
br i1 %tmp6, label %bb8, label %bb7

bb7:
br label %bb8

bb8:
ret void
}

define i32 @test(i32 %a, i32 %b, i32 %c) {
;; Check block bb1 and bb2 are gone, and their probes (probe 2 and 3) are dangling.
; SC-LABEL: @test(
; SC-LABEL-NO: bb1
; SC-LABEL-NO: bb2
; SC: [[T1:%.*]] = icmp eq i32 [[B:%.*]], 0
; SC-DAG: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 2, i32 2, i64 -1)
; SC-DAG: call void @llvm.pseudoprobe(i64 [[#GUID3]], i64 3, i32 2, i64 -1)
; SC: [[T2:%.*]] = icmp sgt i32 [[C:%.*]], 1
; SC: [[T3:%.*]] = add i32 [[A:%.*]], 1
; SC: [[SPEC_SELECT:%.*]] = select i1 [[T2]], i32 [[T3]], i32 [[A]]
; SC: [[T4:%.*]] = select i1 [[T1]], i32 [[SPEC_SELECT]], i32 [[B]]
; SC: [[T5:%.*]] = sub i32 [[T4]], 1
; SC: ret i32 [[T5]]

entry:
%t1 = icmp eq i32 %b, 0
br i1 %t1, label %bb1, label %bb3

bb1:
%t2 = icmp sgt i32 %c, 1
br i1 %t2, label %bb2, label %bb3

bb2:
%t3 = add i32 %a, 1
br label %bb3

bb3:
%t4 = phi i32 [ %b, %entry ], [ %a, %bb1 ], [ %t3, %bb2 ]
%t5 = sub i32 %t4, 1
ret i32 %t5
}
27 changes: 27 additions & 0 deletions llvm/test/Transforms/SampleProfile/pseudo-probe-dangling.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

# REQUIRES: x86-registered-target
# Ensure llc can read and parse MIR pseudo probe operations.
# RUN: llc -mtriple x86_64-- -run-pass=pseudo-probe-inserter %s -o - | FileCheck %s

# CHECK: PSEUDO_PROBE 6699318081062747564, 1, 0, 0
# check probe 2 is moved before the test instruction.
# CHECK: PSEUDO_PROBE 6699318081062747564, 2, 0, 0
# CHECK: TEST32rr
# check probe 3 is dangling.
# CHECK: PSEUDO_PROBE 6699318081062747564, 3, 0, 2

name: foo
body: |
bb.0:
TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
PSEUDO_PROBE 6699318081062747564, 1, 0, 0
JCC_1 %bb.1, 4, implicit $eflags
bb.2:
TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
PSEUDO_PROBE 6699318081062747564, 2, 0, 0
bb.1:
PSEUDO_PROBE 6699318081062747564, 3, 0, 0
...
41 changes: 41 additions & 0 deletions llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; REQUIRES: x86_64-linux
; RUN: llc -pseudo-probe-for-profiling %s -filetype=asm -o - | FileCheck %s

declare i32 @f1()
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0

define void @foo2() {
bb:
; CHECK: .pseudoprobe 2494702099028631698 1 0 0
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1)
%tmp = call i32 @f1()
%tmp1 = icmp eq i32 %tmp, 1
br i1 %tmp1, label %bb5, label %bb8

bb2:
;; Check the pseudo probe with id 2 only has one copy.
; CHECK-COUNT-1: .pseudoprobe 2494702099028631698 2 0 2
; CHECK-NOT: .pseudoprobe 2494702099028631698 2 0 2
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
%tmp4 = icmp ne i32 %tmp, 1
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
switch i1 %tmp4, label %bb2 [
i1 0, label %bb5
i1 1, label %bb8
]

bb5:
%tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
br i1 %tmp6, label %bb8, label %bb7

bb7:
br label %bb8

bb8:
ret void
}

!llvm.pseudo_probe_desc = !{!0}

!0 = !{i64 2494702099028631698, i64 281612674956943, !"foo2", null}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
foo:3200:13
1: 13
2: 7
3: 6
3: 18446744073709551615
4: 13
5: 7 _Z3foov:5 _Z3barv:2
6: 6 _Z3barv:4 _Z3foov:2
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/tools/llvm-profdata/merge-probe-profile.test
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Tests for merge of probe-based profile files.
# Check the dangling probe 3 ends up with 18446744073709551615 (INT64_MAX), i.e, not aggregated.

RUN: llvm-profdata merge --sample --text %p/Inputs/pseudo-probe-profile.proftext -o - | FileCheck %s --check-prefix=MERGE1
RUN: llvm-profdata merge --sample --extbinary %p/Inputs/pseudo-probe-profile.proftext -o %t && llvm-profdata merge --sample --text %t -o - | FileCheck %s --check-prefix=MERGE1
MERGE1: foo:3200:13
MERGE1: 1: 13
MERGE1: 2: 7
MERGE1: 3: 6
MERGE1: 3: 18446744073709551615
MERGE1: 4: 13
MERGE1: 5: 7 _Z3foov:5 _Z3barv:2
MERGE1: 6: 6 _Z3barv:4 _Z3foov:2
Expand All @@ -16,7 +17,7 @@ RUN: llvm-profdata merge --sample --extbinary %p/Inputs/pseudo-probe-profile.pro
MERGE2: foo:6400:26
MERGE2: 1: 26
MERGE2: 2: 14
MERGE2: 3: 12
MERGE2: 3: 18446744073709551615
MERGE2: 4: 26
MERGE2: 5: 14 _Z3foov:10 _Z3barv:4
MERGE2: 6: 12 _Z3barv:8 _Z3foov:4
Expand Down