diff --git a/llvm/test/tools/llvm-profgen/Inputs/buildid-cs-noprobe.aggperfscript b/llvm/test/tools/llvm-profgen/Inputs/buildid-cs-noprobe.aggperfscript new file mode 100644 index 0000000000000..9dbd2725c8e4d --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/buildid-cs-noprobe.aggperfscript @@ -0,0 +1,11 @@ +2 + aabb1122:4005dc + aabb1122:400634 + aabb1122:400684 + 7f68c5788793 + aabb1122:0x4005c8/aabb1122:0x4005dc aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005c8/aabb1122:0x4005dc +2 + aabb1122:4005b0 + aabb1122:400684 + 7f68c5788793 + aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005c8/aabb1122:0x4005dc aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 aabb1122:0x400645/aabb1122:0x4005ff aabb1122:0x400637/aabb1122:0x400645 aabb1122:0x4005e9/aabb1122:0x400634 aabb1122:0x4005d7/aabb1122:0x4005e5 aabb1122:0x40062f/aabb1122:0x4005b0 diff --git a/llvm/test/tools/llvm-profgen/Inputs/buildid-noprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/buildid-noprobe.perfscript new file mode 100644 index 0000000000000..171158ef84d7c --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/buildid-noprobe.perfscript @@ -0,0 +1,5 @@ +// LBR-only perfscript with [buildid:]0xaddr format. +// Same as noprobe.perfscript but with "aabb1122" buildid prefix on all addresses. + aabb1122:40062f aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/9 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/6 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/16 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/6 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/6 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005c8/aabb1122:0x4005dc/P/-/-/8 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/9 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/10 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/14 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/7 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/8 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005c8/aabb1122:0x4005dc/P/-/-/7 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/15 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 + aabb1122:4005d7 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/8 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/6 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/2 aabb1122:0x4005c8/aabb1122:0x4005dc/P/-/-/7 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/11 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/8 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/9 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/5 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/11 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/2 aabb1122:0x4005c8/aabb1122:0x4005dc/P/-/-/7 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/10 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/8 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/9 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/13 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/9 + aabb1122:4005c8 aabb1122:0x4005c8/aabb1122:0x4005dc/P/-/-/11 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/8 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/5 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/6 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/12 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/6 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/2 aabb1122:0x4005c8/aabb1122:0x4005dc/P/-/-/7 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/10 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/8 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/9 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/1 aabb1122:0x4005d7/aabb1122:0x4005e5/P/-/-/12 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/6 aabb1122:0x400645/aabb1122:0x4005ff/P/-/-/1 aabb1122:0x400637/aabb1122:0x400645/P/-/-/1 aabb1122:0x4005e9/aabb1122:0x400634/P/-/-/2 aabb1122:0x4005c8/aabb1122:0x4005dc/P/-/-/8 aabb1122:0x40062f/aabb1122:0x4005b0/P/-/-/8 diff --git a/llvm/test/tools/llvm-profgen/filter-build-id.test b/llvm/test/tools/llvm-profgen/filter-build-id.test new file mode 100644 index 0000000000000..2943c06b2b6ab --- /dev/null +++ b/llvm/test/tools/llvm-profgen/filter-build-id.test @@ -0,0 +1,53 @@ +; REQUIRES: x86_64-linux +; Test that [buildid:]0xaddr format is correctly parsed in perfscript input. + +;; Test 1: LBR-only perfscript with buildid prefix, using --filter-build-id +;; to match "aabb1122". Output should match the original noprobe.perfscript. +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/buildid-noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization --filter-build-id=aabb1122 +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-LBR + +; CHECK-LBR: 7 +; CHECK-LBR-NEXT: 5b0-5c8:7 +; CHECK-LBR-NEXT: 5b0-5d7:13 +; CHECK-LBR-NEXT: 5dc-5e9:6 +; CHECK-LBR-NEXT: 5e5-5e9:12 +; CHECK-LBR-NEXT: 5ff-62f:19 +; CHECK-LBR-NEXT: 634-637:18 +; CHECK-LBR-NEXT: 645-645:18 +; CHECK-LBR-NEXT: 6 +; CHECK-LBR-NEXT: 5c8->5dc:7 +; CHECK-LBR-NEXT: 5d7->5e5:13 +; CHECK-LBR-NEXT: 5e9->634:18 +; CHECK-LBR-NEXT: 62f->5b0:21 +; CHECK-LBR-NEXT: 637->645:18 +; CHECK-LBR-NEXT: 645->5ff:19 + +;; Test 2: Hybrid perfscript with buildid prefix, using --filter-build-id +;; to match "aabb1122". Output should match noinline-cs-noprobe.aggperfscript. +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/buildid-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t2 --skip-symbolization --profile-summary-cold-count=0 --filter-build-id=aabb1122 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-HYBRID + +; CHECK-HYBRID: [foo] +; CHECK-HYBRID-NEXT: 3 +; CHECK-HYBRID-NEXT: 5ff-62f:6 +; CHECK-HYBRID-NEXT: 634-637:6 +; CHECK-HYBRID-NEXT: 645-645:6 +; CHECK-HYBRID-NEXT: 3 +; CHECK-HYBRID-NEXT: 62f->5b0:6 +; CHECK-HYBRID-NEXT: 637->645:6 +; CHECK-HYBRID-NEXT: 645->5ff:6 +; CHECK-HYBRID-NEXT: [foo:3 @ bar] +; CHECK-HYBRID-NEXT: 4 +; CHECK-HYBRID-NEXT: 5b0-5c8:2 +; CHECK-HYBRID-NEXT: 5b0-5d7:4 +; CHECK-HYBRID-NEXT: 5dc-5e9:2 +; CHECK-HYBRID-NEXT: 5e5-5e9:4 +; CHECK-HYBRID-NEXT: 3 +; CHECK-HYBRID-NEXT: 5c8->5dc:4 +; CHECK-HYBRID-NEXT: 5d7->5e5:4 +; CHECK-HYBRID-NEXT: 5e9->634:6 + +;; Test 3: With non-matching filter, all addresses should be filtered out, +;; resulting in no samples (empty output). +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/buildid-noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t3 --skip-symbolization --filter-build-id=ccdd3344 2>&1 | FileCheck %s --check-prefix=CHECK-NOMATCH +; CHECK-NOMATCH: warning: No samples in perf script! diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 1dc59321fd91f..7a429725a54fc 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -60,6 +60,13 @@ static cl::opt CSProfMaxUnsymbolizedCtxDepth( "means no depth limit."), cl::cat(ProfGenCategory)); +static cl::opt FilterBuildID( + "filter-build-id", + cl::desc("Override auto-detected build ID for filtering perfscript " + "addresses in [buildid:]addr format. When set, only addresses " + "with a matching build ID prefix are kept."), + cl::cat(ProfGenCategory)); + namespace sampleprof { void VirtualUnwinder::unwindCall(UnwindState &State) { @@ -654,6 +661,43 @@ void HybridPerfReader::unwindSamples() { "frame to match."); } +/// Parse an address that may optionally have a build ID prefix in +/// [buildid:]addr format. Sets \p BuildID to the build ID prefix (empty if +/// none) and \p Addr to the hex address. Returns true on success. +/// Handles optional "0x" prefix on the address part. +static bool parseAddressWithBuildID(StringRef Str, uint64_t &Addr, + StringRef &BuildID) { + BuildID = StringRef(); + size_t ColonPos = Str.find(':'); + if (ColonPos != StringRef::npos) { + BuildID = Str.substr(0, ColonPos); + Str = Str.substr(ColonPos + 1); + } + Str.consume_front("0x"); + return !Str.getAsInteger(16, Addr); +} + +/// Return the build ID to use for filtering perfscript addresses. +/// If --filter-build-id is specified, use it as an override. +/// Otherwise, use the auto-detected value from the binary. +static StringRef getFilterBuildID(const ProfiledBinary *Binary) { + if (FilterBuildID.getNumOccurrences() > 0) + return FilterBuildID; + return Binary->getFilterBuildID(); +} + +/// Check if a line looks like an LBR sample line. LBR lines start with +/// a space and the first whitespace-delimited token contains '/'. +static bool looksLikeLBRLine(StringRef Line) { + if (!Line.starts_with(" ")) + return false; + StringRef Trimmed = Line.ltrim(); + size_t SpacePos = Trimmed.find(' '); + StringRef FirstToken = + (SpacePos != StringRef::npos) ? Trimmed.substr(0, SpacePos) : Trimmed; + return FirstToken.contains('/'); +} + bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, SmallVectorImpl &LBRStack) { // The raw format of LBR stack is like: @@ -671,8 +715,9 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, // Skip the leading instruction pointer. size_t Index = 0; uint64_t LeadingAddr; + StringRef LeadingBuildID; if (!Records.empty() && !Records[0].contains('/')) { - if (Records[0].getAsInteger(16, LeadingAddr)) { + if (!parseAddressWithBuildID(Records[0], LeadingAddr, LeadingBuildID)) { WarnInvalidLBR(TraceIt); TraceIt.advance(); return false; @@ -692,10 +737,13 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, Token.split(Addresses, "/"); uint64_t Src; uint64_t Dst; + StringRef SrcBuildID, DstBuildID; // Stop at broken LBR records. - if (Addresses.size() < 2 || Addresses[0].substr(2).getAsInteger(16, Src) || - Addresses[1].substr(2).getAsInteger(16, Dst)) { + // Parse [buildid:]0xhexaddr format. + if (Addresses.size() < 2 || + !parseAddressWithBuildID(Addresses[0], Src, SrcBuildID) || + !parseAddressWithBuildID(Addresses[1], Dst, DstBuildID)) { WarnInvalidLBR(TraceIt); break; } @@ -705,6 +753,13 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, Dst = Binary->canonicalizeVirtualAddress(Dst); bool SrcIsInternal = Binary->addressIsCode(Src); bool DstIsInternal = Binary->addressIsCode(Dst); + // Filter by build ID: addresses with a non-matching buildid prefix + // are treated as external. + StringRef BinaryBuildID = getFilterBuildID(Binary); + if (!SrcBuildID.empty() && SrcBuildID != BinaryBuildID) + SrcIsInternal = false; + if (!DstBuildID.empty() && DstBuildID != BinaryBuildID) + DstIsInternal = false; if (!SrcIsInternal) Src = ExternalAddr; if (!DstIsInternal) @@ -722,16 +777,17 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, SmallVectorImpl &CallStack) { // The raw format of call stack is like: - // 4005dc # leaf frame + // 4005dc # leaf frame (no buildid) // 400634 - // 400684 # root frame + // deadbeef:400684 # root frame (with buildid prefix) // It's in bottom-up order with each frame in one line. // Extract stack frames from sample - while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(" 0x")) { + while (!TraceIt.isAtEoF() && !looksLikeLBRLine(TraceIt.getCurrentLine())) { StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); uint64_t FrameAddr = 0; - if (FrameStr.getAsInteger(16, FrameAddr)) { + StringRef FrameBuildID; + if (!parseAddressWithBuildID(FrameStr, FrameAddr, FrameBuildID)) { // We might parse a non-perf sample line like empty line and comments, // skip it TraceIt.advance(); @@ -741,7 +797,13 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, FrameAddr = Binary->canonicalizeVirtualAddress(FrameAddr); // Currently intermixed frame from different binaries is not supported. - if (!Binary->addressIsCode(FrameAddr)) { + bool IsExternal = !Binary->addressIsCode(FrameAddr); + // Filter by build ID: addresses with a non-matching buildid prefix + // are treated as external. + if (!IsExternal && !FrameBuildID.empty() && + FrameBuildID != getFilterBuildID(Binary)) + IsExternal = true; + if (IsExternal) { if (CallStack.empty()) NumLeafExternalFrame++; // Push a special value(ExternalAddr) for the external frames so that @@ -776,7 +838,7 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, // Skip other unrelated line, find the next valid LBR line // Note that even for empty call stack, we should skip the address at the // bottom, otherwise the following pass may generate a truncated callstack - while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(" 0x")) { + while (!TraceIt.isAtEoF() && !looksLikeLBRLine(TraceIt.getCurrentLine())) { TraceIt.advance(); } // Filter out broken stack sample. We may not have complete frame info @@ -821,14 +883,14 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { // Parsing call stack and populate into PerfSample.CallStack if (!extractCallstack(TraceIt, Sample->CallStack)) { // Skip the next LBR line matched current call stack - if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(" 0x")) + if (!TraceIt.isAtEoF() && looksLikeLBRLine(TraceIt.getCurrentLine())) TraceIt.advance(); return; } warnIfMissingMMap(); - if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(" 0x")) { + if (!TraceIt.isAtEoF() && looksLikeLBRLine(TraceIt.getCurrentLine())) { // Parsing LBR stack and populate into PerfSample.LBRStack if (extractLBRStack(TraceIt, Sample->LBRStack)) { if (IgnoreStackSamples) { @@ -1151,15 +1213,18 @@ void PerfScriptReader::parseAndAggregateTrace() { // A LBR sample is like: // 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... -// A heuristic for fast detection by checking whether a -// leading " 0x" and the '/' exist. +// Or with buildid prefix: +// deadbeef:0x40062f deadbeef:0x5c6313f/deadbeef:0x5c63170/P/-/-/0 ... +// A heuristic for fast detection by checking whether the second token +// contains '/' and has an address (0x or buildid:0x prefix). bool PerfScriptReader::isLBRSample(StringRef Line) { // Skip the leading instruction pointer SmallVector Records; Line.trim().split(Records, " ", 2, false); if (Records.size() < 2) return false; - if (Records[1].starts_with("0x") && Records[1].contains('/')) + if (Records[1].contains('/') && + (Records[1].starts_with("0x") || Records[1].contains(":0x"))) return true; return false; } @@ -1197,8 +1262,15 @@ PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) { // Detect sample with call stack int32_t Count = 0; - while (!TraceIt.isAtEoF() && - !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) { + while (!TraceIt.isAtEoF()) { + StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); + // Strip optional buildid prefix for format detection. + size_t ColonPos = FrameStr.find(':'); + if (ColonPos != StringRef::npos) + FrameStr = FrameStr.substr(ColonPos + 1); + FrameStr.consume_front("0x"); + if (FrameStr.getAsInteger(16, FrameAddr)) + break; Count++; TraceIt.advance(); }