diff --git a/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test b/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test index 3eac05e65f8bf..fb327e53b953d 100644 --- a/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test +++ b/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test @@ -1,4 +1,4 @@ ; REQUIRES: x86_64-linux -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-invalid-ret-addr.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t 2>&1 | FileCheck %s +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-invalid-ret-addr.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-detailed-warning 2>&1 | FileCheck %s ; CHECK: warning: Truncated stack sample due to invalid return address at 0x400686, likely caused by frame pointer omission diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index e36c707c4fcd8..3801b12c92dbc 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -31,6 +31,10 @@ static cl::opt IgnoreStackSamples("ignore-stack-samples", cl::init(false), cl::ZeroOrMore, cl::desc("Ignore call stack samples for hybrid samples " "and produce context-insensitive profile.")); +static cl::opt + ShowDetailedWarning("show-detailed-warning", cl::init(false), + cl::ZeroOrMore, + cl::desc("Show detailed warning message.")); extern cl::opt PerfTraceFilename; extern cl::opt ShowDisassemblyOnly; @@ -433,10 +437,16 @@ void HybridPerfReader::unwindSamples() { } // Warn about untracked frames due to missing probes. - for (auto Address : AllUntrackedCallsites) - WithColor::warning() << "Profile context truncated due to missing probe " - << "for call instruction at " - << format("0x%" PRIx64, Address) << "\n"; + if (ShowDetailedWarning) { + for (auto Address : AllUntrackedCallsites) + WithColor::warning() << "Profile context truncated due to missing probe " + << "for call instruction at " + << format("0x%" PRIx64, Address) << "\n"; + } + + emitWarningSummary(AllUntrackedCallsites.size(), SampleCounters.size(), + "of profiled contexts are truncated due to missing probe " + "for call instruction."); } bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, @@ -1008,12 +1018,105 @@ void HybridPerfReader::generateUnsymbolizedProfile() { } void PerfScriptReader::warnTruncatedStack() { - for (auto Address : InvalidReturnAddresses) { - WithColor::warning() - << "Truncated stack sample due to invalid return address at " - << format("0x%" PRIx64, Address) - << ", likely caused by frame pointer omission\n"; + if (ShowDetailedWarning) { + for (auto Address : InvalidReturnAddresses) { + WithColor::warning() + << "Truncated stack sample due to invalid return address at " + << format("0x%" PRIx64, Address) + << ", likely caused by frame pointer omission\n"; + } + } + emitWarningSummary( + InvalidReturnAddresses.size(), AggregatedSamples.size(), + "of truncated stack samples due to invalid return address, " + "likely caused by frame pointer omission."); +} + +void PerfScriptReader::emitWarningSummary(uint64_t Num, uint64_t Total, + StringRef Msg) { + if (!Total || !Num) + return; + WithColor::warning() << format("%.2f", static_cast(Num) * 100 / Total) + << "%(" << Num << "/" << Total << ") " << Msg << "\n"; +} + +void PerfScriptReader::warnInvalidRange() { + std::unordered_map, uint64_t, + pair_hash> + Ranges; + + for (const auto &Item : AggregatedSamples) { + const PerfSample *Sample = Item.first.getPtr(); + uint64_t Count = Item.second; + uint64_t EndOffeset = 0; + for (const LBREntry &LBR : Sample->LBRStack) { + uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); + uint64_t StartOffset = Binary->virtualAddrToOffset(LBR.Target); + if (EndOffeset != 0) + Ranges[{StartOffset, EndOffeset}] += Count; + EndOffeset = SourceOffset; + } } + + if (Ranges.empty()) { + WithColor::warning() << "No samples in perf script!\n"; + return; + } + + auto WarnInvalidRange = + [&](uint64_t StartOffset, uint64_t EndOffset, StringRef Msg) { + if (!ShowDetailedWarning) + return; + WithColor::warning() + << "[" + << format("%8" PRIx64, Binary->offsetToVirtualAddr(StartOffset)) + << "," + << format("%8" PRIx64, Binary->offsetToVirtualAddr(EndOffset)) + << "]: " << Msg << "\n"; + }; + + const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " + "likely due to profile and binary mismatch."; + const char *DanglingRangeMsg = "Range does not belong to any functions, " + "likely from PLT, .init or .fini section."; + const char *RangeCrossFuncMsg = + "Fall through range should not cross function boundaries, likely due to " + "profile and binary mismatch."; + + uint64_t InstNotBoundary = 0; + uint64_t UnmatchedRange = 0; + uint64_t RangeCrossFunc = 0; + + for (auto &I : Ranges) { + uint64_t StartOffset = I.first.first; + uint64_t EndOffset = I.first.second; + + if (!Binary->offsetIsCode(StartOffset) || + !Binary->offsetIsTransfer(EndOffset)) { + InstNotBoundary++; + WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg); + } + + auto *FRange = Binary->findFuncRangeForOffset(StartOffset); + if (!FRange) { + UnmatchedRange++; + WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg); + continue; + } + + if (EndOffset >= FRange->EndOffset) { + RangeCrossFunc++; + WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); + } + } + + uint64_t TotalRangeNum = Ranges.size(); + emitWarningSummary(InstNotBoundary, TotalRangeNum, + "of profiled ranges are not on instruction boundary."); + emitWarningSummary(UnmatchedRange, TotalRangeNum, + "of profiled ranges do not belong to any functions."); + emitWarningSummary(RangeCrossFunc, TotalRangeNum, + "of profiled ranges do cross function boundaries."); } void PerfScriptReader::parsePerfTraces() { @@ -1022,6 +1125,7 @@ void PerfScriptReader::parsePerfTraces() { // Generate unsymbolized profile. warnTruncatedStack(); + warnInvalidRange(); generateUnsymbolizedProfile(); if (SkipSymbolization) diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h index c9f74313c166d..0df334d74ef2a 100644 --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -581,10 +581,13 @@ class PerfScriptReader : public PerfReaderBase { void parseAndAggregateTrace(); // Parse either an MMAP event or a perf sample void parseEventOrSample(TraceStream &TraceIt); + void emitWarningSummary(uint64_t Num, uint64_t Total, StringRef Msg); // Warn if the relevant mmap event is missing. void warnIfMissingMMap(); // Emit accumulate warnings. void warnTruncatedStack(); + // Warn if range is invalid. + void warnInvalidRange(); // Extract call stack from the perf trace lines bool extractCallstack(TraceStream &TraceIt, SmallVectorImpl &CallStack); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index bf5c914c5daa8..cec28626291ef 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -187,7 +187,7 @@ void ProfiledBinary::load() { // Use function start and return address to infer prolog and epilog ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap); - ProEpilogTracker.inferEpilogOffsets(RetAddrs); + ProEpilogTracker.inferEpilogOffsets(RetOffsets); // TODO: decode other sections. } @@ -397,9 +397,11 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, // Populate address maps. CodeAddrOffsets.push_back(Offset); if (MCDesc.isCall()) - CallAddrs.insert(Offset); + CallOffsets.insert(Offset); else if (MCDesc.isReturn()) - RetAddrs.insert(Offset); + RetOffsets.insert(Offset); + else if (MCDesc.isBranch()) + BranchOffsets.insert(Offset); if (InvalidInstLength) { WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index b9049e37a2aa0..b810a611a01a8 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -204,9 +204,11 @@ class ProfiledBinary { // sorting is needed to fast advance to the next forward/backward instruction. std::vector CodeAddrOffsets; // A set of call instruction offsets. Used by virtual unwinding. - std::unordered_set CallAddrs; + std::unordered_set CallOffsets; // A set of return instruction offsets. Used by virtual unwinding. - std::unordered_set RetAddrs; + std::unordered_set RetOffsets; + // A set of branch instruction offsets. + std::unordered_set BranchOffsets; // Estimate and track function prolog and epilog ranges. PrologEpilogTracker ProEpilogTracker; @@ -305,23 +307,31 @@ class ProfiledBinary { return TextSegmentOffsets; } + bool offsetIsCode(uint64_t Offset) const { + return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); + } bool addressIsCode(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); + return offsetIsCode(Offset); } bool addressIsCall(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - return CallAddrs.count(Offset); + return CallOffsets.count(Offset); } bool addressIsReturn(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - return RetAddrs.count(Offset); + return RetOffsets.count(Offset); } bool addressInPrologEpilog(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); return ProEpilogTracker.PrologEpilogSet.count(Offset); } + bool offsetIsTransfer(uint64_t Offset) { + return BranchOffsets.count(Offset) || RetOffsets.count(Offset) || + CallOffsets.count(Offset); + } + uint64_t getAddressforIndex(uint64_t Index) const { return offsetToVirtualAddr(CodeAddrOffsets[Index]); }