Skip to content

Commit

Permalink
[llvm-profgen] Filter out oversized LBR ranges.
Browse files Browse the repository at this point in the history
As a follow up to {D123271}, LBR ranges that are too big should also be considered as invalid.

For example, the last two pairs in the following trace form a range [0x0d7b02b0, 0x368ba706] that covers a ton of functions in the binary. Such oversized range should also be ignored.

   0x0c74505f/0x368b99a0 **0x368ba706**/0x0c745040  0x0d7b1c3f/**0x0d7b02b0**

Add a defensive check to filter out those ranges based that the valid range should not cross the unconditional branch(Call, return, unconditional jmp).

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D125448
  • Loading branch information
htyu authored and wlei-llvm committed May 12, 2022
1 parent 9145cb8 commit 9f732af
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 34 deletions.
9 changes: 9 additions & 0 deletions llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
Expand Up @@ -8,3 +8,12 @@


// The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges.


20179e
2017f9
7f83e84e7793
5541f689495641d7
0x2017cf/0x20179e/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0

// Duplicated LBR entries "0x2017cf/0x20179e/P/-/-/0 0x2017cf/0x20179e/P/-/-/0", the range [0x20179e, 0x2017cf] is cross unconditional jmp, should be ignored to avoid bogus instruction ranges.
51 changes: 26 additions & 25 deletions llvm/test/tools/llvm-profgen/invalid-range.test
Expand Up @@ -8,39 +8,40 @@
; RUN: FileCheck %s --input-file %t2 --check-prefix=CS


; NOCS: 4
; NOCS-NEXT: 201760-20177f:2
; NOCS-NEXT: 20179e-2017bf:1
; NOCS-NEXT: 2017c4-2017cf:1
; NOCS: 4
; NOCS-NEXT: 201760-20177f:7
; NOCS-NEXT: 20179e-2017bf:5
; NOCS-NEXT: 2017c4-2017cf:6
; NOCS-NEXT: 2017c4-2017d8:1
; NOCS-NEXT: 4
; NOCS-NEXT: 20177f->2017c4:2
; NOCS-NEXT: 2017bf->201760:2
; NOCS-NEXT: 2017cf->20179e:2
; NOCS-NEXT: 20177f->2017c4:7
; NOCS-NEXT: 2017bf->201760:7
; NOCS-NEXT: 2017cf->20179e:8
; NOCS-NEXT: 2017d8->2017e3:1


; CS: []
; CS-NEXT: 3
; CS-NEXT: 201760-20177f:1
; CS-NEXT: 20179e-2017bf:1
; CS-NEXT: 2017c4-2017d8:1
; CS-NEXT: 4
; CS-NEXT: 20177f->2017c4:1
; CS-NEXT: 2017bf->201760:1
; CS-NEXT: 2017cf->20179e:1
; CS-NEXT: 2017d8->2017e3:1
; CS-NEXT: 4
; CS-NEXT: 201760-20177f:6
; CS-NEXT: 20179e-2017bf:5
; CS-NEXT: 2017c4-2017cf:5
; CS-NEXT: 2017c4-2017d8:1
; CS-NEXT: 4
; CS-NEXT: 20177f->2017c4:6
; CS-NEXT: 2017bf->201760:6
; CS-NEXT: 2017cf->20179e:6
; CS-NEXT: 2017d8->2017e3:1
; CS-NEXT: [0x7f4]
; CS-NEXT: 1
; CS-NEXT: 2017c4-2017cf:1
; CS-NEXT: 2
; CS-NEXT: 2017bf->201760:1
; CS-NEXT: 2017cf->20179e:1
; CS-NEXT: 1
; CS-NEXT: 2017c4-2017cf:1
; CS-NEXT: 2
; CS-NEXT: 2017bf->201760:1
; CS-NEXT: 2017cf->20179e:2
; CS-NEXT: [0x7f4 @ 0x7bf]
; CS-NEXT: 1
; CS-NEXT: 201760-20177f:1
; CS-NEXT: 1
; CS-NEXT: 20177f->2017c4:1
; CS-NEXT: 1
; CS-NEXT: 201760-20177f:1
; CS-NEXT: 1
; CS-NEXT: 20177f->2017c4:1

; clang -O3 -fuse-ld=lld -fpseudo-probe-for-profiling
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
Expand Down
13 changes: 7 additions & 6 deletions llvm/tools/llvm-profgen/PerfReader.cpp
Expand Up @@ -99,7 +99,8 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
return;
}

if (Target > End) {
if (!isValidFallThroughRange(Binary->virtualAddrToOffset(Target),
Binary->virtualAddrToOffset(End), Binary)) {
// Skip unwinding the rest of LBR trace when a bogus range is seen.
State.setInvalid();
return;
Expand Down Expand Up @@ -581,7 +582,6 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
if (!SrcIsInternal && !DstIsInternal)
continue;

// TODO: filter out buggy duplicate branches on Skylake
LBRStack.emplace_back(LBREntry(Src, Dst));
}
TraceIt.advance();
Expand Down Expand Up @@ -878,7 +878,7 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
// LBR and FROM of next LBR.
uint64_t StartOffset = TargetOffset;
if (Binary->offsetIsCode(StartOffset) && Binary->offsetIsCode(EndOffeset) &&
StartOffset <= EndOffeset)
isValidFallThroughRange(StartOffset, EndOffeset, Binary))
Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
EndOffeset = SourceOffset;
}
Expand Down Expand Up @@ -1124,7 +1124,7 @@ void PerfScriptReader::warnInvalidRange() {
const char *RangeCrossFuncMsg =
"Fall through range should not cross function boundaries, likely due to "
"profile and binary mismatch.";
const char *BogusRangeMsg = "Range start is after range end.";
const char *BogusRangeMsg = "Range start is after or too far from range end.";

uint64_t TotalRangeNum = 0;
uint64_t InstNotBoundary = 0;
Expand Down Expand Up @@ -1155,7 +1155,7 @@ void PerfScriptReader::warnInvalidRange() {
WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg);
}

if (StartOffset > EndOffset) {
if (!isValidFallThroughRange(StartOffset, EndOffset, Binary)) {
BogusRange += I.second;
WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg);
}
Expand All @@ -1172,7 +1172,8 @@ void PerfScriptReader::warnInvalidRange() {
"of samples are from ranges that do cross function boundaries.");
emitWarningSummary(
BogusRange, TotalRangeNum,
"of samples are from ranges that have range start after range end.");
"of samples are from ranges that have range start after or too far from "
"range end acrossing the unconditinal jmp.");
}

void PerfScriptReader::parsePerfTraces() {
Expand Down
11 changes: 11 additions & 0 deletions llvm/tools/llvm-profgen/PerfReader.h
Expand Up @@ -210,6 +210,17 @@ using AggregatedCounter =

using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;

inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
ProfiledBinary *Binary) {
// Start bigger than End is considered invalid.
// LBR ranges cross the unconditional jmp are also assumed invalid.
// It's found that perf data may contain duplicate LBR entries that could form
// a range that does not reflect real execution flow on some Intel targets,
// e.g. Skylake. Such ranges are ususally very long. Exclude them since there
// cannot be a linear execution range that spans over unconditional jmp.
return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
}

// The state for the unwinder, it doesn't hold the data but only keep the
// pointer/index of the data, While unwinding, the CallStack is changed
// dynamicially and will be recorded as the context of the sample
Expand Down
11 changes: 8 additions & 3 deletions llvm/tools/llvm-profgen/ProfiledBinary.cpp
Expand Up @@ -495,12 +495,17 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,

// Populate address maps.
CodeAddrOffsets.push_back(Offset);
if (MCDesc.isCall())
if (MCDesc.isCall()) {
CallOffsets.insert(Offset);
else if (MCDesc.isReturn())
UncondBranchOffsets.insert(Offset);
} else if (MCDesc.isReturn()) {
RetOffsets.insert(Offset);
else if (MCDesc.isBranch())
UncondBranchOffsets.insert(Offset);
} else if (MCDesc.isBranch()) {
if (MCDesc.isUnconditionalBranch())
UncondBranchOffsets.insert(Offset);
BranchOffsets.insert(Offset);
}

if (InvalidInstLength) {
WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
Expand Down
9 changes: 9 additions & 0 deletions llvm/tools/llvm-profgen/ProfiledBinary.h
Expand Up @@ -239,6 +239,8 @@ class ProfiledBinary {
std::unordered_set<uint64_t> CallOffsets;
// A set of return instruction offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> RetOffsets;
// An ordered set of unconditional branch instruction offsets.
std::set<uint64_t> UncondBranchOffsets;
// A set of branch instruction offsets.
std::unordered_set<uint64_t> BranchOffsets;

Expand Down Expand Up @@ -395,6 +397,13 @@ class ProfiledBinary {
CallOffsets.count(Offset);
}

bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) {
if (Start >= End)
return false;
auto R = UncondBranchOffsets.lower_bound(Start);
return R != UncondBranchOffsets.end() && *R < End;
}

uint64_t getAddressforIndex(uint64_t Index) const {
return offsetToVirtualAddr(CodeAddrOffsets[Index]);
}
Expand Down

0 comments on commit 9f732af

Please sign in to comment.