diff --git a/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof new file mode 100644 index 00000000000000..9230ac16016ea8 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/cold-profile-trimming.raw.prof @@ -0,0 +1,55 @@ +27 +400540-400540:10 +400650-40066d:31 +400686-400689:3 +40068b-4006a2:30 +4006b0-4006b7:3 +4006b0-4006bf:60 +4006b0-4006c8:6 +4006d0-4006ea:51 +4006d0-400700:4 +4006ec-400700:30 +400710-40072f:5 +400740-400753:3 +400740-40075b:9 +400740-40076e:14 +400743-400753:3 +400743-40075b:43 +400743-40076e:11 +400755-40075b:4 +400770-400788:6 +400790-400792:12 +400790-4007a6:12 +4007a8-4007b8:11 +4007bd-4007ca:12 +4007cf-4007d7:12 +4007d7-4007d7:12 +400870-400870:12 +400875-4008bf:10 +26 +40066d->400686:3 +400675->400682:1 +400689->4006b9:4 +4006a2->4007a8:6 +4006b7->40068b:3 +4006bf->4006d0:9 +4006c8->4006b0:7 +4006ca->4006ec:3 +4006ea->4006b0:5 +400700->4006b0:7 +40072f->400755:5 +400753->400770:6 +40075b->400743:58 +40075f->400740:2 +40076e->400740:25 +400788->4007a8:6 +400792->4007d7:12 +4007a6->400650:7 +4007a6->400710:5 +4007b8->400790:12 +4007ca->400790:12 +4007d7->4007bd:12 +4007d7->4007cf:13 +40082f->400790:1 +400870->400540:12 +4008bf->400870:15 diff --git a/llvm/test/tools/llvm-profgen/cold-profile-trimming.test b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test new file mode 100644 index 00000000000000..bee98802e80263 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test @@ -0,0 +1,68 @@ +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=0 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-NOTRIM +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=1 --profile-summary-cold-count=100 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM + +;CHECK-NOTRIM: partition_pivot_last:1091:7 +;CHECK-NOTRIM: partition_pivot_first:365:5 +;CHECK-NOTRIM: quick_sort:83:25 +;CHECK-NOTRIM: main:52:0 + +;CHECK-TRIM: partition_pivot_last:1091:7 +;CHECK-TRIM: partition_pivot_first:365:5 +;CHECK-TRIM-NOT: quick_sort:83:25 +;CHECK-TRIM-NOT: main:52:0 + +; original code: +; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out +#include +#include + +void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +int partition_pivot_last(int* array, int low, int high) { + int pivot = array[high]; + int i = low - 1; + for (int j = low; j < high; j++) + if (array[j] < pivot) + swap(&array[++i], &array[j]); + swap(&array[i + 1], &array[high]); + return (i + 1); +} + +int partition_pivot_first(int* array, int low, int high) { + int pivot = array[low]; + int i = low + 1; + for (int j = low + 1; j <= high; j++) + if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;} + swap(&array[i - 1], &array[low]); + return i - 1; +} + +void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) { + if (low < high) { + int pi = (*partition_func)(array, low, high); + quick_sort(array, low, pi - 1, partition_func); + quick_sort(array, pi + 1, high, partition_func); + } +} + +int main() { + const int size = 200; + int sum = 0; + int *array = malloc(size * sizeof(int)); + for(int i = 0; i < 100 * 1000; i++) { + for(int j = 0; j < size; j++) + array[j] = j % 10 ? rand() % size: j; + int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first; + quick_sort(array, 0, size - 1, fptr); + sum += array[i % size]; + } + printf("sum=%d\n", sum); + + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test index 3d7d59ae214ffa..94ef76207168a3 100644 --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -11,7 +11,7 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL ; Test cold profile trimming. Only base profiles should be dropped. -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --csprof-trim-cold-context=1 --profile-summary-hot-count=250 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --trim-cold-profile=1 --profile-summary-hot-count=250 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test index a012ead9ac57b6..120d83e35d7d1d 100644 --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -2,8 +2,8 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8 ; RUN: FileCheck %s --input-file %t1 -; Test --csprof-trim-cold-context=0 -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 +; Test --trim-cold-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD ; Test --csprof-merge-cold-context=0 @@ -11,7 +11,7 @@ ; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED ; Test --csprof-frame-depth-for-cold-context -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --csprof-trim-cold-context=0 --csprof-max-cold-context-depth=2 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH ; CHECK: [fa]:14:4 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 0b90352bc44579..c40604d8a9f0b0 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -50,17 +50,17 @@ static cl::opt RecursionCompression( cl::Hidden, cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); +static cl::opt + TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore, + cl::desc("If the total count of the profile is smaller " + "than threshold, it will be trimmed.")); + static cl::opt CSProfMergeColdContext( "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, cl::desc("If the total count of context profile is smaller than " "the threshold, it will be merged into context-less base " "profile.")); -static cl::opt CSProfTrimColdContext( - "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore, - cl::desc("If the total count of the profile after all merge is done " - "is still smaller than threshold, it will be trimmed.")); - static cl::opt CSProfMaxColdContextDepth( "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, cl::desc("Keep the last K contexts while merging cold profile. 1 means the " @@ -378,9 +378,27 @@ void ProfileGenerator::generateProfile() { void ProfileGenerator::postProcessProfiles() { computeSummaryAndThreshold(); + trimColdProfiles(ProfileMap, ColdCountThreshold); calculateAndShowDensity(ProfileMap); } +void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, + uint64_t ColdCntThreshold) { + if (!TrimColdProfile) + return; + + // Move cold profiles into a tmp container. + std::vector ColdProfiles; + for (const auto &I : ProfileMap) { + if (I.second.getTotalSamples() < ColdCntThreshold) + ColdProfiles.emplace_back(I.first); + } + + // Remove the cold profile from ProfileMap. + for (const auto &I : ColdProfiles) + ProfileMap.erase(I); +} + void ProfileGenerator::generateLineNumBasedProfile() { assert(SampleCounters.size() == 1 && "Must have one entry for profile generation."); @@ -732,10 +750,10 @@ void CSProfileGenerator::postProcessProfiles() { } // Trim and merge cold context profile using cold threshold above. - if (CSProfTrimColdContext || CSProfMergeColdContext) { + if (TrimColdProfile || CSProfMergeColdContext) { SampleContextTrimmer(ProfileMap) .trimAndMergeColdContextProfiles( - HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext, + HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, CSProfMaxColdContextDepth, EnableCSPreInliner); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index c4b77aa0954913..de13f7d840a70e 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -129,6 +129,8 @@ class ProfileGenerator : public ProfileGeneratorBase { void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); void postProcessProfiles(); + void trimColdProfiles(const SampleProfileMap &Profiles, + uint64_t ColdCntThreshold); }; using ProbeCounterMap =