diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index f001f5ee9d39b..08647f5cfb6dd 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -781,6 +781,14 @@ class FunctionSamples { Num, Weight); } + SampleRecord &getOrCreateBodySample(uint32_t LineOffset, + uint32_t Discriminator, uint64_t Num, + uint64_t Weight = 1) { + SampleRecord &Sample = BodySamples[LineLocation(LineOffset, Discriminator)]; + Sample.addSamples(Num, Weight); + return Sample; + } + sampleprof_error addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, FunctionId Func, @@ -975,6 +983,32 @@ class FunctionSamples { return CallsiteSamples; } + /// For each location with inlined function samples, if the number of + /// functions exceed ProfileInlineCallsiteMax, keep removing the function with + /// fewest total count until the number drops below ProfileInlineCallsiteMax. + void trimCallsiteSamples(size_t ProfileInlineCallsiteMax) { + for (auto &CallsiteSample : CallsiteSamples) { + FunctionSamplesMap &FunctionSamples = CallsiteSample.second; + if (ProfileInlineCallsiteMax < FunctionSamples.size()) { + auto It = llvm::map_range(FunctionSamples, + [](FunctionSamplesMap::value_type &V) { + return V.second.getTotalSamples(); + }); + std::vector TotalSamples(It.begin(), It.end()); + std::nth_element(TotalSamples.begin(), + TotalSamples.begin() + ProfileInlineCallsiteMax - 1, + TotalSamples.end(), std::greater()); + uint64_t Threshold = TotalSamples[ProfileInlineCallsiteMax - 1]; + for (auto It = FunctionSamples.begin(); It != FunctionSamples.end();) { + if (It->second.getTotalSamples() < Threshold) + It = FunctionSamples.erase(It); + else + ++It; + } + } + } + } + /// Return the maximum of sample counts in a function body. When SkipCallSite /// is false, which is the default, the return count includes samples in the /// inlined functions. When SkipCallSite is true, the return count only diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 9e8f543909cdb..f09dea4a01e0b 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -494,6 +494,10 @@ class SampleProfileReader { void setModule(const Module *Mod) { M = Mod; } + void setProfileCallTargetMax(size_t N) { ProfileCallTargetMax = N; } + + void setProfileInlineCallsiteMax(size_t N) { ProfileInlineCallsiteMax = N; } + protected: /// Map every function to its associated profile. /// @@ -552,6 +556,16 @@ class SampleProfileReader { /// Whether the profile uses MD5 for Sample Contexts and function names. This /// can be one-way overriden by the user to force use MD5. bool ProfileIsMD5 = false; + + /// Number of call targets to keep in a sample record. Only those with highest + /// count are kept. 0 = unlimited. + /// Same as ProfileCallTargetMax option from SampleProfile.cpp. + uint32_t ProfileCallTargetMax = 0; + + /// Number of inlined callsites to keep in a line location. Only those with + /// highest count are kept. 0 = unlimited. + /// Same as ProfileInlineCallsiteMax option from SampleProfile.cpp. + uint32_t ProfileInlineCallsiteMax = 0; }; class SampleProfileReaderText : public SampleProfileReader { diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index ed92713c2c627..515478db3198c 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -325,6 +325,16 @@ std::error_code SampleProfileReaderText::readImpl() { // top-level or nested function profile. uint32_t DepthMetadata = 0; + // Pop inline stack until size == Depth, handle ProfileInlineCallsiteMax here + // because the current FunctionSamples is done adding inlined callsites. + auto popInlineStack = [&](uint32_t Depth) { + while (InlineStack.size() > Depth) { + if (ProfileInlineCallsiteMax != 0) + InlineStack.back()->trimCallsiteSamples(ProfileInlineCallsiteMax); + InlineStack.pop_back(); + } + }; + ProfileIsFS = ProfileIsFSDisciminator; FunctionSamples::ProfileIsFS = ProfileIsFS; for (; !LineIt.is_at_eof(); ++LineIt) { @@ -358,7 +368,7 @@ std::error_code SampleProfileReaderText::readImpl() { FunctionSamples &FProfile = Profiles.Create(FContext); MergeResult(Result, FProfile.addTotalSamples(NumSamples)); MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); - InlineStack.clear(); + popInlineStack(0); InlineStack.push_back(&FProfile); } else { uint64_t NumSamples; @@ -386,9 +396,7 @@ std::error_code SampleProfileReaderText::readImpl() { // Here we handle FS discriminators. Discriminator &= getDiscriminatorMask(); - while (InlineStack.size() > Depth) { - InlineStack.pop_back(); - } + popInlineStack(Depth); switch (LineTy) { case LineType::CallSiteProfile: { FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( @@ -400,15 +408,26 @@ std::error_code SampleProfileReaderText::readImpl() { break; } case LineType::BodyProfile: { - while (InlineStack.size() > Depth) { - InlineStack.pop_back(); - } FunctionSamples &FProfile = *InlineStack.back(); - for (const auto &name_count : TargetCountMap) { - MergeResult(Result, FProfile.addCalledTargetSamples( - LineOffset, Discriminator, - FunctionId(name_count.first), - name_count.second)); + if (ProfileCallTargetMax != 0) { + std::multimap CallTargets; + for (const auto &CallTarget : TargetCountMap) { + CallTargets.emplace(CallTarget.second, CallTarget.first); + if (CallTargets.size() > ProfileCallTargetMax) + CallTargets.erase(CallTargets.begin()); + } + for (const auto &CallTarget : CallTargets) { + MergeResult(Result, FProfile.addCalledTargetSamples( + LineOffset, Discriminator, + CallTarget.second, CallTarget.first)); + } + } else { + for (const auto &name_count : TargetCountMap) { + MergeResult(Result, + FProfile.addCalledTargetSamples( + LineOffset, Discriminator, + FunctionId(name_count.first), name_count.second)); + } } MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, NumSamples)); @@ -430,6 +449,7 @@ std::error_code SampleProfileReaderText::readImpl() { } } } + popInlineStack(0); assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); @@ -604,20 +624,44 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { // Here we handle FS discriminators: uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); - for (uint32_t J = 0; J < *NumCalls; ++J) { - auto CalledFunction(readStringFromTable()); - if (std::error_code EC = CalledFunction.getError()) - return EC; + SampleRecord &Sample = FProfile.getOrCreateBodySample( + *LineOffset, DiscriminatorVal, *NumSamples); + + if (ProfileCallTargetMax != 0) { + // ProfileCallTargetMax is only used by SampleProfile.cpp at compilation, + // where the top ProfileCallTargetMax mostly called targets are kept and + // others are dropped. + std::multimap CallTargets; + for (uint32_t J = 0; J < *NumCalls; ++J) { + auto CalledFunction(readStringFromTable()); + if (std::error_code EC = CalledFunction.getError()) + return EC; - auto CalledFunctionSamples = readNumber(); - if (std::error_code EC = CalledFunctionSamples.getError()) - return EC; + auto CalledFunctionSamples = readNumber(); + if (std::error_code EC = CalledFunctionSamples.getError()) + return EC; - FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, - *CalledFunction, *CalledFunctionSamples); - } + CallTargets.emplace(*CalledFunctionSamples, *CalledFunction); + if (CallTargets.size() > ProfileCallTargetMax) + CallTargets.erase(CallTargets.begin()); + } + + for (auto &CallTarget : CallTargets) { + Sample.addCalledTarget(CallTarget.second, CallTarget.first); + } + } else { + for (uint32_t J = 0; J < *NumCalls; ++J) { + auto CalledFunction(readStringFromTable()); + if (std::error_code EC = CalledFunction.getError()) + return EC; - FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); + auto CalledFunctionSamples = readNumber(); + if (std::error_code EC = CalledFunctionSamples.getError()) + return EC; + + Sample.addCalledTarget(*CalledFunction, *CalledFunctionSamples); + } + } } // Read all the samples for inlined function calls. @@ -648,6 +692,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { return EC; } + if (ProfileInlineCallsiteMax != 0) + FProfile.trimCallsiteSamples(ProfileInlineCallsiteMax); + return sampleprof_error::success; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 6c6f0a0eca72a..e6d87a6b3d65a 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -213,6 +213,16 @@ cl::opt ProfileInlineLimitMax( cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining.")); +static cl::opt ProfileCallTargetMax( + "sample-profile-call-target-max", cl::Hidden, cl::init(3), + cl::desc("In a sample record, only keep top N frequent indirect call " + "targets at the same location.")); + +static cl::opt ProfileInlineCallsiteMax( + "sample-profile-inline-callsite-max", cl::Hidden, cl::init(3), + cl::desc("In an inlined callsite map, only keep top N frequently inlined " + "callsites at the same location.")); + cl::opt SampleHotCallSiteThreshold( "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " @@ -2015,6 +2025,8 @@ bool SampleProfileLoader::doInitialization(Module &M, // set module before reading the profile so reader may be able to only // read the function profiles which are used by the current module. Reader->setModule(&M); + Reader->setProfileCallTargetMax(ProfileCallTargetMax); + Reader->setProfileInlineCallsiteMax(ProfileInlineCallsiteMax); if (std::error_code EC = Reader->read()) { std::string Msg = "profile reading failed: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-max-callsite.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-max-callsite.proftext new file mode 100644 index 0000000000000..c59e87ec254dc --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/sample-max-callsite.proftext @@ -0,0 +1,20 @@ +main:184019:0 + 4: 534 + 4.2: 534 + 5: 1075 + 5.1: 1075 + 6: 2080 + 7: 534 _Z3bazi:1 + 9: 2064 _Z3bari:1471 _Z3fooi:631 _Z3gooi:123 _Z3hooi:999 + 10: inline1:1000 + 1: 1000 + 2: 1001 func1:10 func2:30 func3:20 + 10: inline2:2000 + 1: 2000 + 3: inlineinline1:3 + 3: inlineinline2:2 + 3: inlineinline3:1 + 10: inline3:45 + 10.1: inline4:1 +_Z3bari:20301:1437 + 1: 1437 diff --git a/llvm/test/tools/llvm-profdata/sample-max-callsite.test b/llvm/test/tools/llvm-profdata/sample-max-callsite.test new file mode 100644 index 0000000000000..723f6df75548b --- /dev/null +++ b/llvm/test/tools/llvm-profdata/sample-max-callsite.test @@ -0,0 +1,124 @@ +# Test options sample-profile-call-target-max and +# sample-profile-inline-callsite-max in llvm-profdata. Same options are +# available in clang when compiling using a sample profile. + +RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=3 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW30 +RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=2 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW20 +RUN: llvm-profdata merge -text -sample -sample-profile-inline-callsite-max=2 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW02 +RUN: llvm-profdata merge -text -sample -sample-profile-inline-callsite-max=1 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW01 +RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=1 -sample-profile-inline-callsite-max=1 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW11 + +RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=0 -sample-profile-inline-callsite-max=0 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW +RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=999 -sample-profile-inline-callsite-max=999 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW + +SHOW30: main:184019:0 +SHOW30-NEXT: 4: 534 +SHOW30-NEXT: 4.2: 534 +SHOW30-NEXT: 5: 1075 +SHOW30-NEXT: 5.1: 1075 +SHOW30-NEXT: 6: 2080 +SHOW30-NEXT: 7: 534 _Z3bazi:1 +SHOW30-NEXT: 9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631 +SHOW30-NEXT: 10: inline1:1000 +SHOW30-NEXT: 1: 1000 +SHOW30-NEXT: 2: 1001 func2:30 func3:20 func1:10 +SHOW30-NEXT: 10: inline2:2000 +SHOW30-NEXT: 1: 2000 +SHOW30-NEXT: 3: inlineinline1:3 +SHOW30-NEXT: 3: inlineinline2:2 +SHOW30-NEXT: 3: inlineinline3:1 +SHOW30-NEXT: 10: inline3:45 +SHOW30-NEXT: 10.1: inline4:1 +SHOW30-NEXT: _Z3bari:20301:1437 +SHOW30-NEXT: 1: 1437 + +SHOW20: main:184019:0 +SHOW20-NEXT: 4: 534 +SHOW20-NEXT: 4.2: 534 +SHOW20-NEXT: 5: 1075 +SHOW20-NEXT: 5.1: 1075 +SHOW20-NEXT: 6: 2080 +SHOW20-NEXT: 7: 534 _Z3bazi:1 +SHOW20-NEXT: 9: 2064 _Z3bari:1471 _Z3hooi:999 +SHOW20-NEXT: 10: inline1:1000 +SHOW20-NEXT: 1: 1000 +SHOW20-NEXT: 2: 1001 func2:30 func3:20 +SHOW20-NEXT: 10: inline2:2000 +SHOW20-NEXT: 1: 2000 +SHOW20-NEXT: 3: inlineinline1:3 +SHOW20-NEXT: 3: inlineinline2:2 +SHOW20-NEXT: 3: inlineinline3:1 +SHOW20-NEXT: 10: inline3:45 +SHOW20-NEXT: 10.1: inline4:1 +SHOW20-NEXT: _Z3bari:20301:1437 +SHOW20-NEXT: 1: 1437 + +SHOW02: main:184019:0 +SHOW02-NEXT: 4: 534 +SHOW02-NEXT: 4.2: 534 +SHOW02-NEXT: 5: 1075 +SHOW02-NEXT: 5.1: 1075 +SHOW02-NEXT: 6: 2080 +SHOW02-NEXT: 7: 534 _Z3bazi:1 +SHOW02-NEXT: 9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631 _Z3gooi:123 +SHOW02-NEXT: 10: inline1:1000 +SHOW02-NEXT: 1: 1000 +SHOW02-NEXT: 2: 1001 func2:30 func3:20 func1:10 +SHOW02-NEXT: 10: inline2:2000 +SHOW02-NEXT: 1: 2000 +SHOW02-NEXT: 3: inlineinline1:3 +SHOW02-NEXT: 3: inlineinline2:2 +SHOW02-NEXT: 10.1: inline4:1 +SHOW02-NEXT: _Z3bari:20301:1437 +SHOW02-NEXT: 1: 1437 + +SHOW01: main:184019:0 +SHOW01-NEXT: 4: 534 +SHOW01-NEXT: 4.2: 534 +SHOW01-NEXT: 5: 1075 +SHOW01-NEXT: 5.1: 1075 +SHOW01-NEXT: 6: 2080 +SHOW01-NEXT: 7: 534 _Z3bazi:1 +SHOW01-NEXT: 9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631 _Z3gooi:123 +SHOW01-NEXT: 10: inline2:2000 +SHOW01-NEXT: 1: 2000 +SHOW01-NEXT: 3: inlineinline1:3 +SHOW01-NEXT: 10.1: inline4:1 +SHOW01-NEXT: _Z3bari:20301:1437 +SHOW01-NEXT: 1: 1437 + +SHOW11: main:184019:0 +SHOW11-NEXT: 4: 534 +SHOW11-NEXT: 4.2: 534 +SHOW11-NEXT: 5: 1075 +SHOW11-NEXT: 5.1: 1075 +SHOW11-NEXT: 6: 2080 +SHOW11-NEXT: 7: 534 _Z3bazi:1 +SHOW11-NEXT: 9: 2064 _Z3bari:1471 +SHOW11-NEXT: 10: inline2:2000 +SHOW11-NEXT: 1: 2000 +SHOW11-NEXT: 3: inlineinline1:3 +SHOW11-NEXT: 10.1: inline4:1 +SHOW11-NEXT: _Z3bari:20301:1437 +SHOW11-NEXT: 1: 1437 + +SHOW: main:184019:0 +SHOW-NEXT: 4: 534 +SHOW-NEXT: 4.2: 534 +SHOW-NEXT: 5: 1075 +SHOW-NEXT: 5.1: 1075 +SHOW-NEXT: 6: 2080 +SHOW-NEXT: 7: 534 _Z3bazi:1 +SHOW-NEXT: 9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631 _Z3gooi:123 +SHOW-NEXT: 10: inline1:1000 +SHOW-NEXT: 1: 1000 +SHOW-NEXT: 2: 1001 func2:30 func3:20 func1:10 +SHOW-NEXT: 10: inline2:2000 +SHOW-NEXT: 1: 2000 +SHOW-NEXT: 3: inlineinline1:3 +SHOW-NEXT: 3: inlineinline2:2 +SHOW-NEXT: 3: inlineinline3:1 +SHOW-NEXT: 10: inline3:45 +SHOW-NEXT: 10.1: inline4:1 +SHOW-NEXT: _Z3bari:20301:1437 +SHOW-NEXT: 1: 1437 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 63e34d81f1892..5a89185465336 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -175,6 +175,14 @@ cl::opt CompressAllSections( cl::sub(MergeSubcommand), cl::desc("Compress all sections when writing the profile (only " "meaningful for -extbinary)")); +cl::opt ProfileCallTargetMax( + "sample-profile-call-target-max", cl::Hidden, cl::init(0), + cl::desc("While reading a profile, in a sample record, only keep top N " + "frequent indirect call targets at the same location.")); +cl::opt ProfileInlineCallsiteMax( + "sample-profile-inline-callsite-max", cl::Hidden, cl::init(0), + cl::desc("While reading a profile, in an inlined callsite map, only keep " + "top N frequently inlined callsites at the same location.")); cl::opt SampleMergeColdContext( "sample-merge-cold-context", cl::init(false), cl::Hidden, cl::sub(MergeSubcommand), @@ -1377,6 +1385,8 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, // merged profile map. Readers.push_back(std::move(ReaderOrErr.get())); const auto Reader = Readers.back().get(); + Reader->setProfileCallTargetMax(ProfileCallTargetMax); + Reader->setProfileInlineCallsiteMax(ProfileInlineCallsiteMax); if (std::error_code EC = Reader->read()) { warnOrExitGivenError(FailMode, EC, Input.Filename); Readers.pop_back();