Skip to content

Conversation

jinhuang1102
Copy link
Contributor

The memchr inliner creates new switch branches but was failling to add profile metada. This patch fixes the issue by explicitly adding unknown branch weights to these branches.

Issue #147390

@llvmbot
Copy link
Member

llvmbot commented Sep 26, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Jin Huang (jinhuang1102)

Changes

The memchr inliner creates new switch branches but was failling to add profile metada. This patch fixes the issue by explicitly adding unknown branch weights to these branches.

Issue #147390


Full diff: https://github.com/llvm/llvm-project/pull/160964.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp (+5)
  • (modified) llvm/test/Transforms/AggressiveInstCombine/memchr.ll (+32-19)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index ee1fec0da3d73..ad42f21bc9944 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1350,6 +1350,11 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
   BB->getTerminator()->eraseFromParent();
   SwitchInst *SI = IRB.CreateSwitch(
       IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
+  Function *F = Call->getFunction();
+  assert(F && "Instruction does not belong to a function!");
+  std::optional<Function::ProfileCount> EC = F->getEntryCount();
+  if (EC && EC->getCount() > 0)
+    setExplicitlyUnknownBranchWeights(*SI, DEBUG_TYPE);
   Type *IndexTy = DL.getIndexType(Call->getType());
   SmallVector<DominatorTree::UpdateType, 8> Updates;
 
diff --git a/llvm/test/Transforms/AggressiveInstCombine/memchr.ll b/llvm/test/Transforms/AggressiveInstCombine/memchr.ll
index b26320be634b8..6fbe960109098 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/memchr.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/memchr.ll
@@ -6,9 +6,10 @@
 
 declare ptr @memchr(ptr, i32, i64)
 
-define i1 @test_memchr_null(i32 %x) {
+define i1 @test_memchr_null(i32 %x) !prof !0 {
 ; CHECK-LABEL: define i1 @test_memchr_null(
-; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]]) 
+; CHECK: !prof [[PROF_0:![0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[X]] to i8
 ; CHECK-NEXT:    switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
@@ -40,9 +41,10 @@ entry:
   ret i1 %isnull
 }
 
-define ptr @test_memchr(i32 %x) {
+define ptr @test_memchr(i32 %x) !prof !0 {
 ; CHECK-LABEL: define ptr @test_memchr(
-; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]]) 
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[X]] to i8
 ; CHECK-NEXT:    switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
@@ -72,16 +74,17 @@ entry:
   ret ptr %memchr
 }
 
-define ptr @test_memchr_smaller_n(i32 %x) {
+define ptr @test_memchr_smaller_n(i32 %x) !prof !0 {
 ; CHECK-LABEL: define ptr @test_memchr_smaller_n(
-; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]]) 
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[X]] to i8
 ; CHECK-NEXT:    switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
 ; CHECK-NEXT:      i8 48, label %[[MEMCHR_CASE:.*]]
 ; CHECK-NEXT:      i8 49, label %[[MEMCHR_CASE1:.*]]
 ; CHECK-NEXT:      i8 0, label %[[MEMCHR_CASE2:.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF_1:![0-9]+]]
 ; CHECK:       [[MEMCHR_CASE]]:
 ; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS:.*]]
 ; CHECK:       [[MEMCHR_CASE1]]:
@@ -103,9 +106,10 @@ entry:
 
 ; negative tests
 
-define ptr @test_memchr_larger_n(i32 %x) {
+define ptr @test_memchr_larger_n(i32 %x) !prof !0 {
 ; CHECK-LABEL: define ptr @test_memchr_larger_n(
-; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]])
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 [[X]], i64 6)
 ; CHECK-NEXT:    ret ptr [[MEMCHR]]
@@ -115,9 +119,10 @@ entry:
   ret ptr %memchr
 }
 
-define ptr @test_memchr_non_constant(i32 %x, ptr %str) {
+define ptr @test_memchr_non_constant(i32 %x, ptr %str) !prof !0 {
 ; CHECK-LABEL: define ptr @test_memchr_non_constant(
-; CHECK-SAME: i32 [[X:%.*]], ptr [[STR:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]], ptr [[STR:%.*]]) 
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr [[STR]], i32 [[X]], i64 5)
 ; CHECK-NEXT:    ret ptr [[MEMCHR]]
@@ -127,8 +132,9 @@ entry:
   ret ptr %memchr
 }
 
-define ptr @test_memchr_constant_ch() {
-; CHECK-LABEL: define ptr @test_memchr_constant_ch() {
+define ptr @test_memchr_constant_ch() !prof !0 {
+; CHECK-LABEL: define ptr @test_memchr_constant_ch() 
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 49, i64 5)
 ; CHECK-NEXT:    ret ptr [[MEMCHR]]
@@ -138,9 +144,10 @@ entry:
   ret ptr %memchr
 }
 
-define ptr @test_memchr_dynamic_n(i32 %x, i32 %y) {
+define ptr @test_memchr_dynamic_n(i32 %x, i32 %y) !prof !0 {
 ; CHECK-LABEL: define ptr @test_memchr_dynamic_n(
-; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) 
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 [[X]], i32 [[Y]])
 ; CHECK-NEXT:    ret ptr [[MEMCHR]]
@@ -150,9 +157,10 @@ entry:
   ret ptr %memchr
 }
 
-define ptr @test_memchr_long(i32 %x) {
+define ptr @test_memchr_long(i32 %x) !prof !0 {
 ; CHECK-LABEL: define ptr @test_memchr_long(
-; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]])
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str_long, i32 [[X]], i64 8)
 ; CHECK-NEXT:    ret ptr [[MEMCHR]]
@@ -163,9 +171,10 @@ entry:
 }
 
 ; We want to check that the compiler still calls memchr if the length is non-constant:
-define ptr @test_memchr_non_constant_length2(i32 %x, i64 %len) {
+define ptr @test_memchr_non_constant_length2(i32 %x, i64 %len) !prof !0 {
 ; CHECK-LABEL: define ptr @test_memchr_non_constant_length2(
-; CHECK-SAME: i32 [[X:%.*]], i64 [[LEN:%.*]]) {
+; CHECK-SAME: i32 [[X:%.*]], i64 [[LEN:%.*]]) 
+; CHECK: !prof [[PROF_0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 [[X]], i64 [[LEN]])
 ; CHECK-NEXT:    ret ptr [[MEMCHR]]
@@ -174,3 +183,7 @@ entry:
   %memchr = call ptr @memchr(ptr @str, i32 %x, i64 %len)
   ret ptr %memchr
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF_1]] = !{!"unknown", !"aggressive-instcombine"}
\ No newline at end of file

Copy link
Member

@mtrofin mtrofin left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM but let's wait for @alanzhao1 to land #158743.

I'd be fine with decoupling and coming afterwards to patch this file with the new API introduced there, too.

@@ -1350,6 +1350,11 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
BB->getTerminator()->eraseFromParent();
SwitchInst *SI = IRB.CreateSwitch(
IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
Function *F = Call->getFunction();
assert(F && "Instruction does not belong to a function!");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use what @alanzhao1 did here (once he lands it)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, can you add a comment saying that we can't know the precise weights here because it'd depend on knowing the value distribution of Call->getArgOperand(1) (if I got that right; just for readability).

Thanks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

@mtrofin mtrofin requested a review from alanzhao1 September 29, 2025 17:16
@tmsri tmsri self-requested a review September 29, 2025 20:36
@jinhuang1102 jinhuang1102 force-pushed the fixup/no-prof-AggressiveInstCombine-memchr branch from db64543 to 447b1d6 Compare September 29, 2025 22:41
Copy link

github-actions bot commented Sep 29, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@jinhuang1102 jinhuang1102 force-pushed the fixup/no-prof-AggressiveInstCombine-memchr branch from 447b1d6 to bd8514e Compare September 29, 2025 22:46
@jinhuang1102 jinhuang1102 force-pushed the fixup/no-prof-AggressiveInstCombine-memchr branch from bd8514e to 3aa7f6f Compare September 29, 2025 23:35
@mtrofin mtrofin enabled auto-merge (squash) September 29, 2025 23:54

!0 = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF_1]] = !{!"unknown", !"aggressive-instcombine"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: missing EOF line.

@mtrofin mtrofin merged commit 39f292f into llvm:main Sep 30, 2025
9 checks passed
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
…#160964)

The memchr inliner creates new switch branches but was failling to add
profile metada. This patch fixes the issue by explicitly adding unknown
branch weights to these branches.

Issue [llvm#147390](llvm#147390)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants