[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata #90217

omern1 · 2024-04-26T14:23:49Z

This patch fixes an integer overflow in the SampleProfileLoader pass.
The issue occurs when weights are saturated and Profi isn't being used.

This patch also adds a newline to a debug message to make it more
readable.

This patch fixes an integer overflow in the SampleProfileLoader pass. The issue occurs when weights are saturated and Profi isn't being used. This patch also adds a newline to a debug message to make it more readable.

llvmbot · 2024-04-26T14:24:21Z

@llvm/pr-subscribers-llvm-transforms

Author: Nabeel Omer (omern1)

Changes

This patch fixes an integer overflow in the SampleProfileLoader pass.
The issue occurs when weights are saturated and Profi isn't being used.

This patch also adds a newline to a debug message to make it more
readable.

Full diff: https://github.com/llvm/llvm-project/pull/90217.diff

3 Files Affected:

(modified) llvm/lib/Transforms/IPO/SampleProfile.cpp (+5-2)
(added) llvm/test/Transforms/SampleProfile/Inputs/overflow.proftext (+2)
(added) llvm/test/Transforms/SampleProfile/overflow.ll (+70)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 0b3a6931e779b6..7018e3feded8ab 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1713,13 +1713,16 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
       // if needed. Sample counts in profiles are 64-bit unsigned values,
       // but internally branch weights are expressed as 32-bit values.
       if (Weight > std::numeric_limits<uint32_t>::max()) {
-        LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+        LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");
         Weight = std::numeric_limits<uint32_t>::max();
       }
       if (!SampleProfileUseProfi) {
         // Weight is added by one to avoid propagation errors introduced by
         // 0 weights.
-        Weights.push_back(static_cast<uint32_t>(Weight + 1));
+        if (Weight != std::numeric_limits<uint32_t>::max())
+          Weight += 1;
+
+        Weights.push_back(static_cast<uint32_t>(Weight));
       } else {
         // Profi creates proper weights that do not require "+1" adjustments but
         // we evenly split the weight among branches with the same destination.
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/overflow.proftext b/llvm/test/Transforms/SampleProfile/Inputs/overflow.proftext
new file mode 100644
index 00000000000000..753294a49e99c5
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/overflow.proftext
@@ -0,0 +1,2 @@
+_Z3testi:29600000000:29600000000
+ 5: 29600000000
diff --git a/llvm/test/Transforms/SampleProfile/overflow.ll b/llvm/test/Transforms/SampleProfile/overflow.ll
new file mode 100644
index 00000000000000..76c7ce874093a1
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/overflow.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/overflow.proftext | opt -passes='print<branch-prob>' -disable-output 2>&1 | FileCheck %s
+
+; Original Source:
+; int sqrt(int);
+; int lol(int i) {
+;    if (i == 5) {
+;        return 42;
+;    }
+;    else {
+;        return sqrt(i);
+;    }
+;}
+
+define dso_local noundef i32 @_Z3testi(i32 noundef %i) local_unnamed_addr #0 !dbg !10 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %i, metadata !16, metadata !DIExpression()), !dbg !17
+  %cmp = icmp eq i32 %i, 5, !dbg !18
+  br i1 %cmp, label %return, label %if.else, !dbg !20
+
+if.else:                                          ; preds = %entry
+  %call = tail call noundef i32 @_Z4sqrti(i32 noundef %i), !dbg !21
+  br label %return, !dbg !23
+
+return:                                           ; preds = %entry, %if.else
+  %retval.0 = phi i32 [ %call, %if.else ], [ 42, %entry ], !dbg !24
+  ret i32 %retval.0, !dbg !25
+}
+
+declare !dbg !26 noundef i32 @_Z4sqrti(i32 noundef)
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.cpp", directory: "/", checksumkind: CSK_MD5, checksum: "cb38d90153a7ebdd6ecf3058eb0524c7")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!9 = !{!"clang"}
+!10 = distinct !DISubprogram(name: "test", linkageName: "_Z3loli", scope: !11, file: !11, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15)
+!11 = !DIFile(filename: "./test.cpp", directory: "/", checksumkind: CSK_MD5, checksum: "cb38d90153a7ebdd6ecf3058eb0524c7")
+!12 = !DISubroutineType(types: !13)
+!13 = !{!14, !14}
+!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!15 = !{!16}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !10, file: !11, line: 3, type: !14)
+!17 = !DILocation(line: 0, scope: !10)
+!18 = !DILocation(line: 4, column: 11, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !10, file: !11, line: 4, column: 9)
+!20 = !DILocation(line: 4, column: 9, scope: !10)
+!21 = !DILocation(line: 8, column: 16, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !19, file: !11, line: 7, column: 10)
+!23 = !DILocation(line: 8, column: 9, scope: !22)
+!24 = !DILocation(line: 0, scope: !19)
+!25 = !DILocation(line: 10, column: 1, scope: !10)
+!26 = !DISubprogram(name: "sqrt", linkageName: "_Z4sqrti", scope: !11, file: !11, line: 1, type: !12, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+
+; CHECK: edge %entry -> %return probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK: edge %entry -> %if.else probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge %if.else -> %return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]

The test is only intended to check whether different profile encoding formats produce the same profile annotations.

WenleiHe

lgtm, thanks.

WenleiHe · 2024-04-26T16:29:07Z

cc @MatzeB @wlei-llvm this one is narrow, but an evidence that we're not out of the woods yet for counts overflow.

omern1 · 2024-04-26T16:30:26Z

@WenleiHe Do you know why the decision was made to use 32 bit values for profile metadata?

WenleiHe · 2024-04-26T16:40:26Z

@WenleiHe Do you know why the decision was made to use 32 bit values for profile metadata?

I don't know the history (@david-xl may know), but I actually don't think using int32 is problematic by itself. With int64, of course it's easier, but it comes with size/memory cost.

I think that int32 should be enough, because for profile counts, we don't necessarily care about the absolute values. What's important is 1) we preserve the relative difference in counts, and 2) we have good resolution on counts difference. We may need to normalize counts to overflow overflow/saturation, which we may not be doing a good job..

david-xl · 2024-04-26T16:50:58Z

@WenleiHe Do you know why the decision was made to use 32 bit values for profile metadata?

I don't know the history (@david-xl may know), but I actually don't think using int32 is problematic by itself. With int64, of course it's easier, but it comes with size/memory cost.

I think that int32 should be enough, because for profile counts, we don't necessarily care about the absolute values. What's important is 1) we preserve the relative difference in counts, and 2) we have good resolution on counts difference. We may need to normalize counts to overflow overflow/saturation, which we may not be doing a good job..

right -- 'weight' is not 'count'. It is scaled from edge count from the very beginning. The function entry's entry count on the other hand is 64 bit. Ideally the weight representation details should not be exposed and all updates be done via APIs.

MatzeB

Wow, good catch!

Surprising this wasn't noticed before, I guess loops that actually run for 4 billion iterations are rare outside of microbenchmarks...

MatzeB · 2024-04-26T17:25:23Z

llvm/lib/Transforms/IPO/SampleProfile.cpp

+        if (Weight != std::numeric_limits<uint32_t>::max())
+          Weight += 1;


This will change the value of Weight below in line 1735 (or 1732 before). Probably should assign to some temporary variable.

MatzeB · 2024-04-26T17:31:24Z

llvm/test/Transforms/SampleProfile/overflow.ll

@@ -0,0 +1,72 @@
+; Checks that we are able to handle overflowing counters correctly.
+
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/overflow.proftext | opt -passes='print<branch-prob>' -disable-output 2>&1 | FileCheck %s


Can you try using utils/update_analyze_test_checks.py and auto-generate the CHECK: lines that makes for more robust tests? We haven't mass-edited older tests for this style, but it should be used for newly added tests.

You can find an example in test/Analysis/BranchProbabilityInfo/basic.ll.

MatzeB · 2024-04-26T17:35:18Z

llvm/test/Transforms/SampleProfile/overflow.ll

+
+; Original Source:
+; int sqrt(int);
+; int lol(int i) {


this function name doesn't match the IR... Though FWIW this seems so simple a function, can just as well remove the whole "Original Source" part in this case as far as I am concerned...

MatzeB

Adding "request changes" as this changing of the Weight variable instead of temporary seems dangerous.

WenleiHe · 2024-04-26T17:56:29Z

llvm/lib/Transforms/IPO/SampleProfile.cpp

      if (Weight > std::numeric_limits<uint32_t>::max()) {
-        LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+        LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");
        Weight = std::numeric_limits<uint32_t>::max();


Unrelated to this particular fix, but I'm also wondering if we would benefit from proper scaling/normalization, instead of just saturate uint32_t in the case of overflow. We can do some sanity check to see how much we actually hit such saturation.

Hi @WenleiHe, we probably do need some form of scaling here. In one of the codebases we (Sony) have we're seeing potential losses due to weights being saturated which will be helped scaling the values.

I'll try and address this in a future patch.

github-actions · 2024-04-29T09:23:44Z

✅ With the latest revision this PR passed the C/C++ code formatter.

omern1 · 2024-04-29T10:11:04Z

The BOLT test failure on the pre-commit CI is unrelated to this patch. It's reproducible even without the patch.

spupyrev · 2024-05-02T19:16:29Z

llvm/lib/Transforms/IPO/SampleProfile.cpp

        Weight = std::numeric_limits<uint32_t>::max();
      }
      if (!SampleProfileUseProfi) {
        // Weight is added by one to avoid propagation errors introduced by
        // 0 weights.
-        Weights.push_back(static_cast<uint32_t>(Weight + 1));
+        Weights.push_back(static_cast<uint32_t>(


I'm curious if this path is still beneficial for some use cases? Can we trust profi results and drop this hack altogether? @omern1

you mean delete the non-profi inference path completely? some people might still want a fall back.

i'm curious to learn about such use cases; are there some known examples where fallback is preferred?

Hi @spupyrev, I can't speak for all users of the non-profi inference path but from our perspective that would require analysis to understand the impact of profi on our codebases.

I think this discussion can be had separately from this patch.

If you / @WenleiHe / @MatzeB are happy with the current state of this patch can I please get an LGTM?

Of course, I didn't plan to delay the patch, will stamp it.
(Though if you care about perf and the experiments with profi aren't too time-consuming, please try it out and share the results with us. Ultimately it would be great to keep only one codepath here, but the change should be done carefully)

Thanks! I'll look into this soon and share the results.

spupyrev · 2024-05-07T16:06:35Z

l see one failed test currently:

| LLVM :: Transforms/SampleProfile/fnptr.ll

Is this unrelated?

MatzeB

LGTM

FWIW: I also think that having two code paths for prof/non-profi is not great, but that shouldn't block a simple bug fix.

MatzeB · 2024-05-07T17:07:45Z

(obviously should fix the test before landing)

This reverts commit 6e9d7c1.

omern1 · 2024-05-08T13:31:41Z

l see one failed test currently:

| LLVM :: Transforms/SampleProfile/fnptr.ll

Is this unrelated?

My bad, now fixed.

omern1 added 2 commits April 26, 2024 15:22

Add pre-commit tests

14d695d

[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata

e08dfb3

This patch fixes an integer overflow in the SampleProfileLoader pass. The issue occurs when weights are saturated and Profi isn't being used. This patch also adds a newline to a debug message to make it more readable.

llvmbot added the llvm:transforms label Apr 26, 2024

omern1 requested review from spupyrev and WenleiHe April 26, 2024 14:25

omern1 added 2 commits April 26, 2024 16:48

Fix failing test fnptr.ll

6e9d7c1

The test is only intended to check whether different profile encoding formats produce the same profile annotations.

Add comment to test explaining what it does

a9aa67b

WenleiHe approved these changes Apr 26, 2024

View reviewed changes

MatzeB reviewed Apr 26, 2024

View reviewed changes

MatzeB requested changes Apr 26, 2024

View reviewed changes

WenleiHe reviewed Apr 26, 2024

View reviewed changes

Address review comments

7f87640

Fix formatting

73348bc

omern1 requested a review from MatzeB April 29, 2024 10:11

spupyrev reviewed May 2, 2024

View reviewed changes

spupyrev approved these changes May 7, 2024

View reviewed changes

MatzeB approved these changes May 7, 2024

View reviewed changes

Revert "Fix failing test fnptr.ll"

45086a9

This reverts commit 6e9d7c1.

omern1 merged commit 686a206 into llvm:main May 8, 2024
3 of 4 checks passed

omern1 deleted the omern1/pgo-fix branch May 8, 2024 13:35

MatzeB mentioned this pull request Jun 25, 2024

[PGO] Fix branch weights overflow #96541

Closed

Nechda mentioned this pull request Jun 25, 2024

[SPGO] The most frequent transition becomes unreachable #88361

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata #90217

[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata #90217

omern1 commented Apr 26, 2024

llvmbot commented Apr 26, 2024

WenleiHe left a comment

WenleiHe commented Apr 26, 2024

omern1 commented Apr 26, 2024 •

edited

Loading

WenleiHe commented Apr 26, 2024

david-xl commented Apr 26, 2024

MatzeB left a comment

MatzeB Apr 26, 2024

MatzeB Apr 26, 2024

MatzeB Apr 26, 2024

MatzeB left a comment

WenleiHe Apr 26, 2024

omern1 May 8, 2024 •

edited

Loading

github-actions bot commented Apr 29, 2024 •

edited

Loading

omern1 commented Apr 29, 2024 •

edited

Loading

spupyrev May 2, 2024

WenleiHe May 3, 2024

spupyrev May 3, 2024

omern1 May 7, 2024

spupyrev May 7, 2024

omern1 May 8, 2024

spupyrev commented May 7, 2024

MatzeB left a comment

MatzeB commented May 7, 2024

omern1 commented May 8, 2024

		if (Weight != std::numeric_limits<uint32_t>::max())
		Weight += 1;

		@@ -0,0 +1,72 @@
		; Checks that we are able to handle overflowing counters correctly.

		; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/overflow.proftext \| opt -passes='print<branch-prob>' -disable-output 2>&1 \| FileCheck %s

[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata #90217

[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata #90217

Conversation

omern1 commented Apr 26, 2024

llvmbot commented Apr 26, 2024

WenleiHe left a comment

Choose a reason for hiding this comment

WenleiHe commented Apr 26, 2024

omern1 commented Apr 26, 2024 • edited Loading

WenleiHe commented Apr 26, 2024

david-xl commented Apr 26, 2024

MatzeB left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

MatzeB left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

omern1 May 8, 2024 • edited Loading

Choose a reason for hiding this comment

github-actions bot commented Apr 29, 2024 • edited Loading

omern1 commented Apr 29, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

spupyrev commented May 7, 2024

MatzeB left a comment

Choose a reason for hiding this comment

MatzeB commented May 7, 2024

omern1 commented May 8, 2024

omern1 commented Apr 26, 2024 •

edited

Loading

omern1 May 8, 2024 •

edited

Loading

github-actions bot commented Apr 29, 2024 •

edited

Loading

omern1 commented Apr 29, 2024 •

edited

Loading