From 484a569eea7ba294d84b9a700e6fcc2f97626320 Mon Sep 17 00:00:00 2001 From: wlei Date: Thu, 2 Dec 2021 16:51:42 -0800 Subject: [PATCH] [llvm-profgen] Fix total samples related issues Since total sample and body sample are used to compute hotness threshold in compiler, we found in some services changing the total samples computation will cause noticeable regression. Hence, here we will revert the changes and just keep all total samples number identical to the old tool. Three changes in this diff: 1. Revert previous diff(https://reviews.llvm.org/D112672: [llvm-profgen] Update total samples by accumulating all its body samples) and put it under a switch. 2. Keep the negative line number. Although compiler doesn't consume the count but it will be used to compute hot threshold. 3. Change to accumulate total samples per byte instead of per instruction. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D115013 --- .../llvm-profgen/cold-profile-trimming.test | 18 +-- llvm/test/tools/llvm-profgen/coroutine.test | 2 +- .../test/tools/llvm-profgen/cs-interrupt.test | 8 +- .../test/tools/llvm-profgen/cs-preinline.test | 21 ++-- .../llvm-profgen/fname-canonicalization.test | 4 +- .../tools/llvm-profgen/fs-discriminator.test | 48 +++---- llvm/test/tools/llvm-profgen/func-split.test | 8 +- .../tools/llvm-profgen/inline-cs-noprobe.test | 21 ++-- .../llvm-profgen/inline-force-dwarf.test | 8 +- .../tools/llvm-profgen/inline-noprobe.test | 46 +++++-- .../tools/llvm-profgen/inline-noprobe2.test | 118 +++++++++--------- .../tools/llvm-profgen/multi-load-segs.test | 3 +- .../llvm-profgen/noinline-cs-noprobe.test | 31 +++-- .../tools/llvm-profgen/noinline-noprobe.test | 26 ++-- .../tools/llvm-profgen/profile-density.test | 8 +- .../recursion-compression-noprobe.test | 98 +++++++++------ llvm/tools/llvm-profgen/ProfileGenerator.cpp | 27 +++- llvm/tools/llvm-profgen/ProfileGenerator.h | 3 +- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 6 +- llvm/tools/llvm-profgen/ProfiledBinary.h | 7 ++ 20 files changed, 296 insertions(+), 215 deletions(-) diff --git a/llvm/test/tools/llvm-profgen/cold-profile-trimming.test b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test index bee98802e8026..6659b3e845f90 100644 --- a/llvm/test/tools/llvm-profgen/cold-profile-trimming.test +++ b/llvm/test/tools/llvm-profgen/cold-profile-trimming.test @@ -1,17 +1,17 @@ ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=0 ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-NOTRIM -; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=1 --profile-summary-cold-count=100 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/cold-profile-trimming.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --trim-cold-profile=1 --profile-summary-cold-count=1000 ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-TRIM -;CHECK-NOTRIM: partition_pivot_last:1091:7 -;CHECK-NOTRIM: partition_pivot_first:365:5 -;CHECK-NOTRIM: quick_sort:83:25 -;CHECK-NOTRIM: main:52:0 +;CHECK-NOTRIM: partition_pivot_last:5187:7 +;CHECK-NOTRIM: partition_pivot_first:3010:5 +;CHECK-NOTRIM: quick_sort:903:25 +;CHECK-NOTRIM: main:820:0 -;CHECK-TRIM: partition_pivot_last:1091:7 -;CHECK-TRIM: partition_pivot_first:365:5 -;CHECK-TRIM-NOT: quick_sort:83:25 -;CHECK-TRIM-NOT: main:52:0 +;CHECK-TRIM: partition_pivot_last:5187:7 +;CHECK-TRIM: partition_pivot_first:3010:5 +;CHECK-TRIM-NOT: quick_sort:903:25 +;CHECK-TRIM-NOT: main:820:0 ; original code: ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out diff --git a/llvm/test/tools/llvm-profgen/coroutine.test b/llvm/test/tools/llvm-profgen/coroutine.test index dd57c3ffe0c85..de6ec39da7cd8 100644 --- a/llvm/test/tools/llvm-profgen/coroutine.test +++ b/llvm/test/tools/llvm-profgen/coroutine.test @@ -2,7 +2,7 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK ; Check that the head sample count for ticker is 0. -; CHECK: _Z6tickeri:67:0 +; CHECK: _Z6tickeri:1566:0 ; CHECK-NOT: _Z6tickeri.resume diff --git a/llvm/test/tools/llvm-profgen/cs-interrupt.test b/llvm/test/tools/llvm-profgen/cs-interrupt.test index b029e372ec332..95f38376e28d9 100644 --- a/llvm/test/tools/llvm-profgen/cs-interrupt.test +++ b/llvm/test/tools/llvm-profgen/cs-interrupt.test @@ -6,14 +6,14 @@ >>>>>>> 02ea7084c370 ([llvm-profgen] Support LBR only perf script) ; RUN: FileCheck %s --input-file %t -; CHECK:[main:1 @ foo:3 @ bar]:17:5 +; CHECK:[main:1 @ foo]:88:0 +; CHECK: 2: 5 +; CHECK: 3: 5 bar:5 +; CHECK:[main:1 @ foo:3 @ bar]:74:5 ; CHECK: 0: 5 ; CHECK: 1: 5 ; CHECK: 2: 3 ; CHECK: 5: 4 -; CHECK:[main:1 @ foo]:10:0 -; CHECK: 2: 5 -; CHECK: 3: 5 bar:5 ; CHECK-UNWINDER: [main:1 @ foo] ; CHECK-UNWINDER-NEXT: 3 diff --git a/llvm/test/tools/llvm-profgen/cs-preinline.test b/llvm/test/tools/llvm-profgen/cs-preinline.test index 94ef76207168a..7b7e3b36a7fbd 100644 --- a/llvm/test/tools/llvm-profgen/cs-preinline.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline.test @@ -11,41 +11,44 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-NO-PREINL ; Test cold profile trimming. Only base profiles should be dropped. -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --trim-cold-profile=1 --profile-summary-hot-count=250 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --csspgo-preinliner=1 --trim-cold-profile=1 --profile-summary-hot-count=400 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-TRIM -; CHECK-DEFAULT: [main:1 @ foo]:44:0 +; CHECK-DEFAULT: [main:1 @ foo]:309:0 ; CHECK-DEFAULT-NEXT: 2.1: 14 ; CHECK-DEFAULT-NEXT: 3: 15 ; CHECK-DEFAULT-NEXT: 3.1: 14 bar:14 ; CHECK-DEFAULT-NEXT: 3.2: 1 +; CHECK-DEFAULT-NEXT: 65526: 14 ; CHECK-DEFAULT-NEXT: !Attributes: 1 -; CHECK-DEFAULT-NEXT:[main:1 @ foo:3.1 @ bar]:14:0 +; CHECK-DEFAULT-NEXT:[main:1 @ foo:3.1 @ bar]:84:0 ; CHECK-DEFAULT-NEXT: 1: 14 ; CHECK-DEFAULT-NEXT: !Attributes: 1 -; CHECK-PREINL: [foo]:44:0 +; CHECK-PREINL: [foo]:309:0 ; CHECK-PREINL-NEXT: 2.1: 14 ; CHECK-PREINL-NEXT: 3: 15 ; CHECK-PREINL-NEXT: 3.1: 14 bar:14 ; CHECK-PREINL-NEXT: 3.2: 1 +; CHECK-PREINL-NEXT: 65526: 14 ; CHECK-PREINL-NEXT: !Attributes: 1 -; CHECK-PREINL-NEXT:[foo:3.1 @ bar]:14:0 +; CHECK-PREINL-NEXT:[foo:3.1 @ bar]:84:0 ; CHECK-PREINL-NEXT: 1: 14 ; CHECK-PREINL-NEXT: !Attributes: 3 -; CHECK-NO-PREINL: [foo]:44:0 +; CHECK-NO-PREINL: [foo]:309:0 ; CHECK-NO-PREINL-NEXT: 2.1: 14 ; CHECK-NO-PREINL-NEXT: 3: 15 ; CHECK-NO-PREINL-NEXT: 3.1: 14 bar:14 ; CHECK-NO-PREINL-NEXT: 3.2: 1 +; CHECK-NO-PREINL-NEXT: 65526: 14 ; CHECK-NO-PREINL-NEXT: !Attributes: 1 -; CHECK-NO-PREINL-NEXT:[bar]:14:0 +; CHECK-NO-PREINL-NEXT:[bar]:84:0 ; CHECK-NO-PREINL-NEXT: 1: 14 ; CHECK-NO-PREINL-NEXT: !Attributes: 1 -; CHECK-TRIM-NOT: [foo] -; CHECK-TRIM:[foo:3.1 @ bar]:14:0 +; CHECK-TRIM-NOT: [foo]:309:0 +; CHECK-TRIM:[foo:3.1 @ bar]:84:0 ; CHECK-TRIM-NEXT: 1: 14 ; CHECK-TRIM-NEXT: !Attributes: 3 diff --git a/llvm/test/tools/llvm-profgen/fname-canonicalization.test b/llvm/test/tools/llvm-profgen/fname-canonicalization.test index 21b43a8f4fb2e..79789a4240b17 100644 --- a/llvm/test/tools/llvm-profgen/fname-canonicalization.test +++ b/llvm/test/tools/llvm-profgen/fname-canonicalization.test @@ -2,10 +2,10 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/unique-linkage-name-dwarf.perfscript --binary=%S/Inputs/unique-linkage-name-dwarf.perfbin --output=%t --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-DWARF-FNAME -; CHECK-DWARF-FNAME:[main:1 @ foo]:43:0 +; CHECK-DWARF-FNAME:[main:1 @ foo]:309:0 ; CHECK-DWARF-FNAME: 2: 14 ; CHECK-DWARF-FNAME: 3: 29 bar.__uniq.26267048767521081047744692097241227776:14 -; CHECK-DWARF-FNAME:[main:1 @ foo:3 @ bar.__uniq.26267048767521081047744692097241227776]:14:0 +; CHECK-DWARF-FNAME:[main:1 @ foo:3 @ bar.__uniq.26267048767521081047744692097241227776]:84:0 ; CHECK-DWARF-FNAME: 1: 14 diff --git a/llvm/test/tools/llvm-profgen/fs-discriminator.test b/llvm/test/tools/llvm-profgen/fs-discriminator.test index 2fd52b82a22f3..604e44ff31962 100644 --- a/llvm/test/tools/llvm-profgen/fs-discriminator.test +++ b/llvm/test/tools/llvm-profgen/fs-discriminator.test @@ -5,7 +5,7 @@ ;CHECK-SECTION: ProfileSummarySection - Offset: [[#]], Size: [[#]], Flags: {fs-discriminator} -;CHECK: partition_pivot_last:88:1 +;CHECK: partition_pivot_last:507:1 ;CHECK: 1: 4 ;CHECK: 2: 4 ;CHECK: 3: 4 @@ -22,7 +22,7 @@ ;CHECK: 5.5120: 2 ;CHECK: 6: 3 ;CHECK: 7: 3 -;CHECK: 5: swap:18 +;CHECK: 5: swap:80 ;CHECK: 1: 2 ;CHECK: 1.1024: 2 ;CHECK: 1.4096: 2 @@ -32,12 +32,29 @@ ;CHECK: 3: 2 ;CHECK: 3.512: 2 ;CHECK: 3.3584: 2 -;CHECK: 6: swap:12 +;CHECK: 65532: 2 +;CHECK: 6: swap:51 ;CHECK: 1.14336: 3 ;CHECK: 2.7168: 3 ;CHECK: 2.11776: 3 ;CHECK: 3.13824: 3 -;CHECK: partition_pivot_first:41:1 +;CHECK: main:369:0 +;CHECK: 0: 0 +;CHECK: 3: 0 +;CHECK: 4.1: 1 +;CHECK: 4.3: 1 +;CHECK: 5.3: 4 +;CHECK: 5.1537: 4 +;CHECK: 6: 4 +;CHECK: 6.1: 3 +;CHECK: 6.3: 4 +;CHECK: 7: 1 +;CHECK: 8: 1 quick_sort:1 +;CHECK: 9: 1 +;CHECK: 11: 0 +;CHECK: 14: 0 +;CHECK: 65499: 2 +;CHECK: partition_pivot_first:298:1 ;CHECK: 0: 1 ;CHECK: 1: 1 ;CHECK: 2: 1 @@ -50,31 +67,17 @@ ;CHECK: 4.3: 4 ;CHECK: 5: 1 ;CHECK: 6: 1 -;CHECK: 4.2: swap:6 +;CHECK: 65516: 1 +;CHECK: 4.2: swap:24 ;CHECK: 1.7168: 2 ;CHECK: 2: 2 ;CHECK: 3.4608: 2 -;CHECK: 5: swap:4 +;CHECK: 5: swap:17 ;CHECK: 1: 1 ;CHECK: 2.229376: 1 ;CHECK: 2.589824: 1 ;CHECK: 3: 1 -;CHECK: main:24:0 -;CHECK: 0: 0 -;CHECK: 3: 0 -;CHECK: 4.1: 1 -;CHECK: 4.3: 1 -;CHECK: 5.3: 4 -;CHECK: 5.1537: 4 -;CHECK: 6: 4 -;CHECK: 6.1: 3 -;CHECK: 6.3: 4 -;CHECK: 7: 1 -;CHECK: 8: 1 quick_sort:1 -;CHECK: 9: 1 -;CHECK: 11: 0 -;CHECK: 14: 0 -;CHECK: quick_sort:13:2 +;CHECK: quick_sort:122:2 ;CHECK: 0: 2 ;CHECK: 1: 2 ;CHECK: 1.15360: 2 @@ -82,6 +85,7 @@ ;CHECK: 3: 1 quick_sort:1 ;CHECK: 4: 2 ;CHECK: 6: 2 +;CHECK: 65507: 2 ; original code: diff --git a/llvm/test/tools/llvm-profgen/func-split.test b/llvm/test/tools/llvm-profgen/func-split.test index 6ace0cd0ab60e..d27ed882bcb9f 100644 --- a/llvm/test/tools/llvm-profgen/func-split.test +++ b/llvm/test/tools/llvm-profgen/func-split.test @@ -3,26 +3,26 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/func-split.perfscript --binary=%S/Inputs/func-split.perfbin --output=%t --ignore-stack-samples ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-STRIP-CTX -;CHECK: [foo]:82:0 +;CHECK: [foo]:409:0 ;CHECK: 2.1: 27 ;CHECK: 3: 27 ;CHECK: 3.1: 2 bar:2 ;CHECK: 3.2: 26 -;CHECK: [foo:3.1 @ bar]:2:0 +;CHECK: [foo:3.1 @ bar]:8:0 ;CHECK: 1: 1 ;CHECK: 5: 1 ;CHECK: [bar]:0:1 ;CHECK-NOT: foo.cold -;CHECK-STRIP-CTX: foo:81:0 +;CHECK-STRIP-CTX: foo:1334:0 ;CHECK-STRIP-CTX: 0: 0 ;CHECK-STRIP-CTX: 2.1: 27 ;CHECK-STRIP-CTX: 3: 27 ;CHECK-STRIP-CTX: 3.1: 1 bar:1 ;CHECK-STRIP-CTX: 3.2: 26 ;CHECK-STRIP-CTX: 4: 0 -;CHECK-STRIP-CTX: bar:2:1 +;CHECK-STRIP-CTX: bar:27:1 ;CHECK-STRIP-CTX: 1: 1 ;CHECK-STRIP-CTX: 5: 1 diff --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test index cd8dae192e9ec..a31147a1a7863 100644 --- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test @@ -13,19 +13,21 @@ ; CHECK-SYM-LIST: foo ; CHECK-SYM-LIST: main -; CHECK:[main:1 @ foo]:44:0 +; CHECK:[main:1 @ foo]:309:0 ; CHECK: 2.1: 14 ; CHECK: 3: 15 ; CHECK: 3.1: 14 bar:14 ; CHECK: 3.2: 1 -; CHECK:[main:1 @ foo:3.1 @ bar]:14:0 +; CHECK: 65526: 14 +; CHECK:[main:1 @ foo:3.1 @ bar]:84:0 ; CHECK: 1: 14 ; CHECK-UNWINDER: [main:1 @ foo] -; CHECK-UNWINDER: 3 -; CHECK-UNWINDER: 670-69b:1 -; CHECK-UNWINDER: 67e-69b:14 -; CHECK-UNWINDER: 6bf-6c8:14 +; CHECK-UNWINDER: 4 +; CHECK-UNWINDER: 670-6ad:1 +; CHECK-UNWINDER: 67e-69b:1 +; CHECK-UNWINDER: 67e-6ad:13 +; CHECK-UNWINDER: 6bd-6c8:14 ; CHECK-UNWINDER: 2 ; CHECK-UNWINDER: 69b->670:1 ; CHECK-UNWINDER: 6c8->67e:15 @@ -34,16 +36,17 @@ ; CHECK-UNWINDER: 6af-6bb:14 ; CHECK-UNWINDER: 0 -; CHECK-STRIP-CTX: main:44:0 +; CHECK-STRIP-CTX: main:1109:0 ; CHECK-STRIP-CTX: 0: 0 ; CHECK-STRIP-CTX: 2: 0 -; CHECK-STRIP-CTX: 1: foo:44 +; CHECK-STRIP-CTX: 1: foo:1109 ; CHECK-STRIP-CTX: 2.1: 14 ; CHECK-STRIP-CTX: 3: 15 ; CHECK-STRIP-CTX: 3.2: 1 ; CHECK-STRIP-CTX: 4: 0 ; CHECK-STRIP-CTX: 7: 0 -; CHECK-STRIP-CTX: 3.1: bar:14 +; CHECK-STRIP-CTX: 65526: 14 +; CHECK-STRIP-CTX: 3.1: bar:196 ; CHECK-STRIP-CTX: 1: 14 ; original code: diff --git a/llvm/test/tools/llvm-profgen/inline-force-dwarf.test b/llvm/test/tools/llvm-profgen/inline-force-dwarf.test index 05cb91f26defd..ca7c236de79db 100644 --- a/llvm/test/tools/llvm-profgen/inline-force-dwarf.test +++ b/llvm/test/tools/llvm-profgen/inline-force-dwarf.test @@ -1,15 +1,17 @@ ; RUN: llvm-profgen --format=text --ignore-stack-samples --use-dwarf-correlation --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t -; CHECK: main:43:0 +; CHECK: main:1109:0 ; CHECK-NEXT: 0: 0 ; CHECK-NEXT: 2: 0 -; CHECK-NEXT: 1: foo:43 +; CHECK-NEXT: 1: foo:1109 ; CHECK-NEXT: 2: 14 ; CHECK-NEXT: 3: 15 ; CHECK-NEXT: 4: 0 -; CHECK-NEXT: 3: bar:14 +; CHECK-NEXT: 65526: 14 +; CHECK-NEXT: 3: bar:224 ; CHECK-NEXT: 1: 14 +; CHECK-NEXT: 65533: 14 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe.test b/llvm/test/tools/llvm-profgen/inline-noprobe.test index 9d1473e097417..f960f074fefff 100644 --- a/llvm/test/tools/llvm-profgen/inline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe.test @@ -4,6 +4,8 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 --update-total-samples +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-UPDATE-TOTAL-SAMPLE ; RUN: llvm-profgen --format=text --use-dwarf-correlation --perfscript=%S/Inputs/inline-noprobe.perfscript --binary=%S/Inputs/inline-noprobe.perfbin --output=%t ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK ; RUN: echo -e "0\n0" > %t @@ -12,19 +14,36 @@ ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/out-of-bounds.raw.prof --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-OB -CHECK: main:188:0 +CHECK: main:2609:0 CHECK: 0: 0 CHECK: 2: 0 -CHECK: 1: foo:188 +CHECK: 1: foo:2609 CHECK: 2.1: 42 CHECK: 3: 62 CHECK: 3.2: 21 CHECK: 4: 0 -CHECK: 3.1: bar:42 +CHECK: 65526: 62 +CHECK: 3.1: bar:546 CHECK: 1: 42 -CHECK: 3.2: bar:21 +CHECK: 65533: 42 +CHECK: 3.2: bar:189 CHECK: 1: 21 +CHECK-UPDATE-TOTAL-SAMPLE: main:292:0 +CHECK-UPDATE-TOTAL-SAMPLE: 0: 0 +CHECK-UPDATE-TOTAL-SAMPLE: 2: 0 +CHECK-UPDATE-TOTAL-SAMPLE: 1: foo:292 +CHECK-UPDATE-TOTAL-SAMPLE: 2.1: 42 +CHECK-UPDATE-TOTAL-SAMPLE: 3: 62 +CHECK-UPDATE-TOTAL-SAMPLE: 3.2: 21 +CHECK-UPDATE-TOTAL-SAMPLE: 4: 0 +CHECK-UPDATE-TOTAL-SAMPLE: 65526: 62 +CHECK-UPDATE-TOTAL-SAMPLE: 3.1: bar:84 +CHECK-UPDATE-TOTAL-SAMPLE: 1: 42 +CHECK-UPDATE-TOTAL-SAMPLE: 65533: 42 +CHECK-UPDATE-TOTAL-SAMPLE: 3.2: bar:21 +CHECK-UPDATE-TOTAL-SAMPLE: 1: 21 + CHECK-ALL-ZERO: bar:0:0 CHECK-ALL-ZERO: 1: 0 CHECK-ALL-ZERO: 5: 0 @@ -60,30 +79,33 @@ CHECK-RAW-PROFILE-NEXT: 2 CHECK-RAW-PROFILE-NEXT: 677->650:21 CHECK-RAW-PROFILE-NEXT: 691->669:43 -;CHECK-OB: foo:8:0 +;CHECK-OB: foo:114:0 ;CHECK-OB: 0: 1 ;CHECK-OB: 2.1: 1 ;CHECK-OB: 3: 1 ;CHECK-OB: 3.2: 1 ;CHECK-OB: 4: 1 -;CHECK-OB: 3.1: bar:1 +;CHECK-OB: 3.1: bar:13 ;CHECK-OB: 1: 1 -;CHECK-OB: 3.2: bar:2 +;CHECK-OB: 65533: 1 +;CHECK-OB: 3.2: bar:24 ;CHECK-OB: 1: 1 ;CHECK-OB: 7: 1 -;CHECK-OB: main:8:0 +;CHECK-OB: main:100:0 ;CHECK-OB: 0: 1 ;CHECK-OB: 2: 1 -;CHECK-OB: 1: foo:6 +;CHECK-OB: 1: foo:80 ;CHECK-OB: 2.1: 1 ;CHECK-OB: 3: 1 ;CHECK-OB: 3.2: 1 ;CHECK-OB: 4: 1 -;CHECK-OB: 3.1: bar:1 +;CHECK-OB: 65526: 1 +;CHECK-OB: 3.1: bar:13 ;CHECK-OB: 1: 1 -;CHECK-OB: 3.2: bar:1 +;CHECK-OB: 65533: 1 +;CHECK-OB: 3.2: bar:9 ;CHECK-OB: 1: 1 -;CHECK-OB: bar:2:0 +;CHECK-OB: bar:14:0 ;CHECK-OB: 1: 1 ;CHECK-OB: 5: 1 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe2.test b/llvm/test/tools/llvm-profgen/inline-noprobe2.test index d8c73ffad83d0..14e1518ba4341 100644 --- a/llvm/test/tools/llvm-profgen/inline-noprobe2.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe2.test @@ -27,76 +27,74 @@ ;CHECK-RAW-PROFILE-NOT: 7f7448e889e4 ;CHECK-RAW-PROFILE-NOT: 7f7448e88826 -;CHECK: partition_pivot_first:367:5 -;CHECK-NEXT: 0: 5 -;CHECK-NEXT: 1: 5 -;CHECK-NEXT: 2: 5 -;CHECK-NEXT: 3: 5 -;CHECK-NEXT: 3.1: 83 -;CHECK-NEXT: 4: 82 -;CHECK-NEXT: 4.1: 26 -;CHECK-NEXT: 4.2: 25 -;CHECK-NEXT: 4.3: 26 -;CHECK-NEXT: 5: 6 -;CHECK-NEXT: 6: 6 -;CHECK-NEXT: 4.2: swap:75 -;CHECK-NEXT: 1: 25 -;CHECK-NEXT: 2: 25 -;CHECK-NEXT: 3: 25 -;CHECK-NEXT: 5: swap:18 -;CHECK-NEXT: 1: 6 -;CHECK-NEXT: 2: 6 -;CHECK-NEXT: 3: 6 -;CHECK-NEXT: partition_pivot_last:225:7 -;CHECK-NEXT: 1: 6 -;CHECK-NEXT: 2: 6 -;CHECK-NEXT: 3: 6 +;CHECK: partition_pivot_first:3022:5 +;CHECK: 0: 5 +;CHECK: 1: 5 +;CHECK: 2: 5 +;CHECK: 3: 5 +;CHECK: 3.1: 83 +;CHECK: 4: 82 +;CHECK: 4.1: 26 +;CHECK: 4.2: 25 +;CHECK: 4.3: 26 +;CHECK: 5: 6 +;CHECK: 6: 6 +;CHECK: 65516: 5 +;CHECK: 4.2: swap:300 +;CHECK: 1: 25 +;CHECK: 2: 25 +;CHECK: 3: 25 +;CHECK: 5: swap:102 +;CHECK: 1: 6 +;CHECK: 2: 6 +;CHECK: 3: 6 +;CHECK: partition_pivot_last:1210:7 +;CHECK: 1: 6 +;CHECK: 2: 6 +;CHECK: 3: 6 ;w/o duplication factor : 3.1: 18 ;w/o duplication factor : 3.3: 18 ;w/o duplication factor : 4: 19 ;w/o duplication factor : 5: 9 -;CHECK-NEXT: 3.1: 36 -;CHECK-NEXT: 3.3: 36 -;CHECK-NEXT: 4: 38 -;CHECK-NEXT: 5: 18 -;CHECK-NEXT: 6: 5 -;CHECK-NEXT: 7: 5 -;CHECK-NEXT: 5: swap:54 +;CHECK: 3.1: 36 +;CHECK: 3.3: 36 +;CHECK: 4: 38 +;CHECK: 5: 18 +;CHECK: 6: 5 +;CHECK: 7: 5 +;CHECK: 65526: 2 +;CHECK: 5: swap:247 ;w/o duplication factor : 1: 9 ;w/o duplication factor : 2: 9 ;w/o duplication factor : 3: 9 -;CHECK-NEXT: 1: 18 -;CHECK-NEXT: 2: 18 -;CHECK-NEXT: 3: 18 - -;CHECK-NEXT: 6: swap:15 -;CHECK-NEXT: 1: 5 -;CHECK-NEXT: 2: 5 -;CHECK-NEXT: 3: 5 -;CHECK-NEXT: quick_sort:83:25 -;CHECK-NEXT: 1: 24 -;CHECK-NEXT: 2: 12 partition_pivot_last:7 partition_pivot_first:5 -;CHECK-NEXT: 3: 11 quick_sort:12 -;CHECK-NEXT: 4: 12 quick_sort:12 -;CHECK-NEXT: 6: 24 -;CHECK-NEXT: main:52:0 -;CHECK-NEXT: 0: 0 -;CHECK-NEXT: 3: 0 -;CHECK-NEXT: 4.1: 0 -;CHECK-NEXT: 4.3: 0 -;CHECK-NEXT: 5.1: 10 -;CHECK-NEXT: 5.3: 10 -;CHECK-NEXT: 6: 10 -;CHECK-NEXT: 6.1: 12 -;CHECK-NEXT: 6.3: 10 -;CHECK-NEXT: 7: 0 -;CHECK-NEXT: 8: 0 quick_sort:1 -;CHECK-NEXT: 9: 0 -;CHECK-NEXT: 11: 0 -;CHECK-NEXT: 14: 0 + +;CHECK: 1: 18 +;CHECK: 2: 18 +;CHECK: 3: 18 +;CHECK: 6: swap:85 +;CHECK: 1: 5 +;CHECK: 2: 5 +;CHECK: 3: 5 +;CHECK: main:820:0 +;CHECK: 0: 0 +;CHECK: 3: 0 +;CHECK: 4.1: 0 +;CHECK: 4.3: 0 +;CHECK: 5.1: 10 +;CHECK: 5.3: 10 +;CHECK: 6: 10 +;CHECK: 6.1: 12 +;CHECK: 6.3: 10 +;CHECK: 7: 0 +;CHECK: 8: 0 quick_sort:1 +;CHECK: 9: 0 +;CHECK: 11: 0 +;CHECK: 14: 0 +;CHECK: 65499: 0 + ; original code: ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out diff --git a/llvm/test/tools/llvm-profgen/multi-load-segs.test b/llvm/test/tools/llvm-profgen/multi-load-segs.test index 7af50d3feb056..89e88b0fcb33b 100644 --- a/llvm/test/tools/llvm-profgen/multi-load-segs.test +++ b/llvm/test/tools/llvm-profgen/multi-load-segs.test @@ -9,9 +9,10 @@ ;; LOAD 0x000e10 0x0000000000203e10 0x0000000000203e10 0x000040 0x000058 RW 0x1000 ;; LOAD 0x200000 0x0000000000400000 0x0000000000400000 0x0005e8 0x0005e8 R E 0x200000 -; CHECK: [main:2 @ _Z10sort_arrayv:6 @ _Z11bubble_sortPii]:124:0 +; CHECK: [main:2 @ _Z10sort_arrayv:6 @ _Z11bubble_sortPii]:465:0 ; CHECK-NEXT: 4: 31 ; CHECK-NEXT: 5: 31 ; CHECK-NEXT: 7: 31 ; CHECK-NEXT: 8: 31 +; CHECK-NEXT: 65516: 31 ; CHECK-NEXT: !Attributes: 1 diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test index cff82f7b52ae3..8629dafc7706a 100644 --- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test @@ -19,16 +19,15 @@ ; RUN: diff -b %t.summary %t2.summary -; CHECK-AGG:[main:1 @ foo:3 @ bar]:24:6 +; CHECK-AGG:[main:1 @ foo]:108:0 +; CHECK-AGG: 2: 6 +; CHECK-AGG: 3: 6 bar:6 +; CHECK-AGG:[main:1 @ foo:3 @ bar]:100:6 ; CHECK-AGG: 0: 6 ; CHECK-AGG: 1: 6 ; CHECK-AGG: 2: 4 ; CHECK-AGG: 4: 2 ; CHECK-AGG: 5: 6 -; CHECK-AGG:[main:1 @ foo]:12:0 -; CHECK-AGG: 2: 6 -; CHECK-AGG: 3: 6 bar:6 - ; CHECK-AGG-UNWINDER: [main:1 @ foo] ; CHECK-AGG-UNWINDER-NEXT: 3 @@ -51,29 +50,29 @@ ; CHECK-AGG-UNWINDER-NEXT: 5e9->634:6 -; CHECK:[main:1 @ foo:3 @ bar]:12:3 +; CHECK:[main:1 @ foo]:54:0 +; CHECK: 2: 3 +; CHECK: 3: 3 bar:3 +; CHECK:[main:1 @ foo:3 @ bar]:50:3 ; CHECK: 0: 3 ; CHECK: 1: 3 ; CHECK: 2: 2 ; CHECK: 4: 1 ; CHECK: 5: 3 -; CHECK:[main:1 @ foo]:6:0 -; CHECK: 2: 3 -; CHECK: 3: 3 bar:3 -; CHECK-STRIP-CTX: bar:12:3 -; CHECK-STRIP-CTX: 0: 3 -; CHECK-STRIP-CTX: 1: 3 -; CHECK-STRIP-CTX: 2: 2 -; CHECK-STRIP-CTX: 4: 1 -; CHECK-STRIP-CTX: 5: 3 -; CHECK-STRIP-CTX: foo:6:0 +; CHECK-STRIP-CTX: foo:198:0 ; CHECK-STRIP-CTX: 0: 0 ; CHECK-STRIP-CTX: 1: 0 ; CHECK-STRIP-CTX: 2: 3 ; CHECK-STRIP-CTX: 3: 3 bar:3 ; CHECK-STRIP-CTX: 4: 0 ; CHECK-STRIP-CTX: 5: 0 +; CHECK-STRIP-CTX: bar:142:3 +; CHECK-STRIP-CTX: 0: 3 +; CHECK-STRIP-CTX: 1: 3 +; CHECK-STRIP-CTX: 2: 2 +; CHECK-STRIP-CTX: 4: 1 +; CHECK-STRIP-CTX: 5: 3 ; CHECK-UNWINDER: [main:1 @ foo] ; CHECK-UNWINDER-NEXT: 3 diff --git a/llvm/test/tools/llvm-profgen/noinline-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-noprobe.test index ff2b5a55c3b76..f5bbc295462c0 100644 --- a/llvm/test/tools/llvm-profgen/noinline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-noprobe.test @@ -7,19 +7,19 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK -CHECK: bar:77:21 -CHECK: 0: 20 -CHECK: 1: 20 -CHECK: 2: 13 -CHECK: 4: 6 -CHECK: 5: 18 -CHECK: foo:38:0 -CHECK: 0: 0 -CHECK: 1: 0 -CHECK: 2: 19 -CHECK: 3: 19 bar:21 -CHECK: 4: 0 -CHECK: 5: 0 +;CHECK: foo:1241:0 +;CHECK: 0: 0 +;CHECK: 1: 0 +;CHECK: 2: 19 +;CHECK: 3: 19 bar:21 +;CHECK: 4: 0 +;CHECK: 5: 0 +;CHECK: bar:926:21 +;CHECK: 0: 20 +;CHECK: 1: 20 +;CHECK: 2: 13 +;CHECK: 4: 6 +;CHECK: 5: 18 CHECK-RAW-PROFILE: 7 CHECK-RAW-PROFILE-NEXT: 5b0-5c8:7 diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test index 4ab1f79344fd4..1d7c2f9cd5dfe 100644 --- a/llvm/test/tools/llvm-profgen/profile-density.test +++ b/llvm/test/tools/llvm-profgen/profile-density.test @@ -1,13 +1,13 @@ -; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --show-density -hot-function-density-threshold=1 &> %t2 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --show-density -hot-function-density-threshold=10 --trim-cold-profile=0 &> %t2 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-DENSITY ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -hot-function-density-threshold=1 &> %t4 ; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-DENSITY-CS -;CHECK-DENSITY: AutoFDO is estimated to optimize better with 4.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. -;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 0.2 +;CHECK-DENSITY: AutoFDO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples. +;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2 -;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 31.4 +;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 128.3 ; original code: ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test index b45e91b846fd9..14d184194ee8b 100644 --- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test @@ -6,51 +6,77 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0 --csprof-max-context-depth=2 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0 -; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 13 fb:11 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:48:0 ; CHECK-UNCOMPRESS: 1: 11 ; CHECK-UNCOMPRESS: 2: 1 fa:1 -; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0 +; CHECK-UNCOMPRESS: 65532: 11 +; CHECK-UNCOMPRESS: !Attributes: 1 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:24:0 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 2: 13 fb:11 +; CHECK-UNCOMPRESS: 65527: 1 +; CHECK-UNCOMPRESS: !Attributes: 1 +; CHECK-UNCOMPRESS:[main:1 @ foo]:7:0 ; CHECK-UNCOMPRESS: 2: 1 ; CHECK-UNCOMPRESS: 3: 2 fa:1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:2:0 +; CHECK-UNCOMPRESS: !Attributes: 1 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:7:0 ; CHECK-UNCOMPRESS: 1: 1 -; CHECK-UNCOMPRESS: 2: 1 -; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0 +; CHECK-UNCOMPRESS: 2: 2 fb:1 +; CHECK-UNCOMPRESS: !Attributes: 1 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:2:0 +; CHECK-UNCOMPRESS: 2: 1 fa:1 +; CHECK-UNCOMPRESS: 65532: 1 +; CHECK-UNCOMPRESS: !Attributes: 1 +; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:2:0 ; CHECK-UNCOMPRESS: 4: 1 +; CHECK-UNCOMPRESS: !Attributes: 0 -; CHECK-MAX-CTX-DEPTH:[foo:3 @ fa:2 @ fb]:11:0 -; CHECK-MAX-CTX-DEPTH: 1: 11 -; CHECK-MAX-CTX-DEPTH:[fa:2 @ fb:2 @ fa]:3:0 -; CHECK-MAX-CTX-DEPTH: 1: 1 -; CHECK-MAX-CTX-DEPTH: 2: 1 -; CHECK-MAX-CTX-DEPTH: 4: 1 -; CHECK-MAX-CTX-DEPTH:[main:1 @ foo]:3:0 -; CHECK-MAX-CTX-DEPTH: 2: 1 -; CHECK-MAX-CTX-DEPTH: 3: 2 fa:1 -; CHECK-MAX-CTX-DEPTH:[main:1 @ foo:3 @ fa]:3:0 -; CHECK-MAX-CTX-DEPTH: 1: 1 -; CHECK-MAX-CTX-DEPTH: 2: 2 - - - +; CHECK-MAX-CTX-DEPTH: [foo:3 @ fa:2 @ fb]:47:0 +; CHECK-MAX-CTX-DEPTH: 1: 11 +; CHECK-MAX-CTX-DEPTH: 65532: 11 +; CHECK-MAX-CTX-DEPTH: !Attributes: 1 +; CHECK-MAX-CTX-DEPTH: [main:1 @ foo:3 @ fa]:13:0 +; CHECK-MAX-CTX-DEPTH: 1: 1 +; CHECK-MAX-CTX-DEPTH: 2: 2 +; CHECK-MAX-CTX-DEPTH: 65527: 1 +; CHECK-MAX-CTX-DEPTH: !Attributes: 1 +; CHECK-MAX-CTX-DEPTH: [fa:2 @ fb:2 @ fa]:8:0 +; CHECK-MAX-CTX-DEPTH: 1: 1 +; CHECK-MAX-CTX-DEPTH: 2: 1 +; CHECK-MAX-CTX-DEPTH: 4: 1 +; CHECK-MAX-CTX-DEPTH: !Attributes: 1 +; CHECK-MAX-CTX-DEPTH: [main:1 @ foo]:7:0 +; CHECK-MAX-CTX-DEPTH: 2: 1 +; CHECK-MAX-CTX-DEPTH: 3: 2 fa:1 +; CHECK-MAX-CTX-DEPTH: !Attributes: 1 +; CHECK-MAX-CTX-DEPTH: [fb:2 @ fa:2 @ fb]:1:0 +; CHECK-MAX-CTX-DEPTH: 65532: 1 +; CHECK-MAX-CTX-DEPTH: !Attributes: 1 -; CHECK: [main:1 @ foo:3 @ fa]:14:0 -; CHECK: 1: 1 -; CHECK: 2: 13 fb:11 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0 -; CHECK: 1: 11 -; CHECK: 2: 1 fa:1 -; CHECK: [main:1 @ foo]:3:0 -; CHECK: 2: 1 -; CHECK: 3: 2 fa:1 -; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0 -; CHECK: 1: 1 -; CHECK: 2: 1 -; CHECK: 4: 1 +;CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:48:0 +;CHECK: 1: 11 +;CHECK: 2: 1 fa:1 +;CHECK: 65532: 11 +;CHECK: !Attributes: 1 +;CHECK: [main:1 @ foo:3 @ fa]:24:0 +;CHECK: 1: 1 +;CHECK: 2: 13 fb:11 +;CHECK: 65527: 1 +;CHECK: !Attributes: 1 +;CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:9:0 +;CHECK: 1: 1 +;CHECK: 2: 2 fb:1 +;CHECK: 4: 1 +;CHECK: !Attributes: 1 +;CHECK: [main:1 @ foo]:7:0 +;CHECK: 2: 1 +;CHECK: 3: 2 fa:1 +;CHECK: !Attributes: 1 +;CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0 +;CHECK: 65532: 1 +;CHECK: !Attributes: 1 ; original code: ; clang -O3 -g test.c -o a.out diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index c40604d8a9f0b..93a6ca2c233e8 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -81,6 +81,12 @@ static cl::opt ShowDensity("show-density", llvm::cl::init(false), llvm::cl::desc("show profile density details"), llvm::cl::Optional); +static cl::opt UpdateTotalSamples( + "update-total-samples", llvm::cl::init(false), + llvm::cl::desc( + "Update total samples by accumulating all its body samples."), + llvm::cl::Optional); + extern cl::opt ProfileSummaryCutoffHot; using namespace llvm; @@ -350,6 +356,9 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( } void ProfileGeneratorBase::updateTotalSamples() { + if (!UpdateTotalSamples) + return; + for (auto &Item : ProfileMap) { FunctionSamples &FunctionProfile = Item.second; FunctionProfile.updateTotalSamples(); @@ -411,11 +420,12 @@ void ProfileGenerator::generateLineNumBasedProfile() { updateTotalSamples(); } -FunctionSamples &ProfileGenerator::getLeafFrameProfile( - const SampleContextFrameVector &FrameVec) { +FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( + const SampleContextFrameVector &FrameVec, uint64_t Count) { // Get top level profile FunctionSamples *FunctionProfile = &getTopLevelFunctionProfile(FrameVec[0].FuncName); + FunctionProfile->addTotalSamples(Count); for (size_t I = 1; I < FrameVec.size(); I++) { LineLocation Callsite( @@ -430,6 +440,7 @@ FunctionSamples &ProfileGenerator::getLeafFrameProfile( Ret.first->second.setContext(Context); } FunctionProfile = &Ret.first->second; + FunctionProfile->addTotalSamples(Count); } return *FunctionProfile; @@ -479,7 +490,12 @@ void ProfileGenerator::populateBodySamplesForAllFunctions( const SampleContextFrameVector &FrameVec = Binary->getFrameLocationStack(Offset); if (!FrameVec.empty()) { - FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec); + // FIXME: As accumulating total count per instruction caused some + // regression, we changed to accumulate total count per byte as a + // workaround. Tuning hotness threshold on the compiler side might be + // necessary in the future. + FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( + FrameVec, Count * Binary->getInstSize(Offset)); updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), Count); } @@ -514,7 +530,8 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions( const SampleContextFrameVector &FrameVec = Binary->getFrameLocationStack(SourceOffset); if (!FrameVec.empty()) { - FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec); + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, 0); FunctionProfile.addCalledTargetSamples( FrameVec.back().Location.LineOffset, getBaseDiscriminator(FrameVec.back().Location.Discriminator), @@ -640,6 +657,7 @@ void CSProfileGenerator::populateBodySamplesForFunction( if (LeafLoc.hasValue()) { // Recording body sample for this specific context updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); + FunctionProfile.addTotalSamples(Count); } } while (IP.advance() && IP.Address <= RangeEnd); } @@ -731,6 +749,7 @@ void CSProfileGenerator::populateInferredFunctionSamples() { CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, CallerLeafFrameLoc.Location.Discriminator, EstimatedCallCount); + CallerProfile.addTotalSamples(EstimatedCallCount); } } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index de13f7d840a70..2b239c6321ace 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -124,7 +124,8 @@ class ProfileGenerator : public ProfileGeneratorBase { // inline stack and meanwhile it adds the total samples for each frame's // function profile. FunctionSamples & - getLeafFrameProfile(const SampleContextFrameVector &FrameVec); + getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec, + uint64_t Count); void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); void populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 619af8df7a4d2..15e8f0e620588 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -694,15 +694,11 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, FunctionName = FunctionSamples::getCanonicalFnName(FunctionName); uint32_t Discriminator = CallerFrame.Discriminator; - uint32_t LineOffset = CallerFrame.Line - CallerFrame.StartLine; + uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff; if (UseProbeDiscriminator) { LineOffset = PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); Discriminator = 0; - } else { - // Filter out invalid negative(int type) lineOffset - if (LineOffset & 0xffff0000) - return SampleContextFrameVector(); } LineLocation Line(LineOffset, Discriminator); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 3d32cba874b59..fcc3a4f0c20a2 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -332,6 +332,13 @@ class ProfiledBinary { return TextSegmentOffsets; } + uint64_t getInstSize(uint64_t Offset) const { + auto I = Offset2InstSizeMap.find(Offset); + if (I == Offset2InstSizeMap.end()) + return 0; + return I->second; + } + bool offsetIsCode(uint64_t Offset) const { return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); }