-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[MergeICmp][profcheck] Propagate profile info #167594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MergeICmp][profcheck] Propagate profile info #167594
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-ir Author: Mircea Trofin (mtrofin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/167594.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index fade7a2dbac2b..50c16a5791e70 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -148,6 +148,9 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I,
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
bool IsExpected, bool ElideAllZero = false);
+/// Push the weights right to fit in uint32_t.
+LLVM_ABI SmallVector<uint32_t> fitWeights(ArrayRef<uint64_t> Weights);
+
/// Variant of `setBranchWeights` where the `Weights` will be fit first to
/// uint32_t by shifting right.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights,
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index 94dbe1f3988b8..d1ada00fb1f64 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -86,7 +86,7 @@ static void extractFromBranchWeightMD(const MDNode *ProfileData,
}
/// Push the weights right to fit in uint32_t.
-static SmallVector<uint32_t> fitWeights(ArrayRef<uint64_t> Weights) {
+SmallVector<uint32_t> llvm::fitWeights(ArrayRef<uint64_t> Weights) {
SmallVector<uint32_t> Ret;
Ret.reserve(Weights.size());
uint64_t Max = *llvm::max_element(Weights);
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index f273e9d90e71e..083aeb188620f 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -50,8 +50,9 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
@@ -66,6 +67,9 @@ using namespace llvm;
#define DEBUG_TYPE "mergeicmps"
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
namespace {
// A BCE atom "Binary Compare Expression Atom" represents an integer load
@@ -647,6 +651,8 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
ToSplit->split(BB, AA);
}
+ SmallVector<uint32_t, 2> BranchWeights;
+ bool HasBranchWeights = false;
if (Comparisons.size() == 1) {
LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
// Use clone to keep the metadata
@@ -656,6 +662,8 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
RhsLoad->replaceUsesOfWith(RhsLoad->getOperand(0), Rhs);
// There are no blocks to merge, just do the comparison.
IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
+ HasBranchWeights =
+ extractBranchWeights(*FirstCmp.BB->getTerminator(), BranchWeights);
} else {
const unsigned TotalSizeBits = std::accumulate(
Comparisons.begin(), Comparisons.end(), 0u,
@@ -673,6 +681,20 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
Builder, DL, &TLI);
IsEqual = Builder.CreateICmpEQ(
MemCmpCall, ConstantInt::get(Builder.getIntNTy(IntBits), 0));
+ SmallVector<uint64_t, 2> Weights(2);
+ Weights[1] = 1;
+ for (const auto &C : Comparisons) {
+ SmallVector<uint32_t, 2> W;
+ if (!extractBranchWeights(*C.BB->getTerminator(), W)) {
+ HasBranchWeights = false;
+ break;
+ }
+ std::swap(W[0], W[1]);
+ Weights = getDisjunctionWeights(Weights, W);
+ }
+ std::swap(Weights[0], Weights[1]);
+ BranchWeights = fitWeights(Weights);
+ HasBranchWeights = true;
}
BasicBlock *const PhiBB = Phi.getParent();
@@ -684,7 +706,9 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}});
} else {
// Continue to next block if equal, exit to phi else.
- Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
+ auto *BI = Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
+ if (!ProfcheckDisableMetadataFixes && HasBranchWeights)
+ setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
Phi.addIncoming(ConstantInt::getFalse(Context), BB);
DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
{DominatorTree::Insert, BB, PhiBB}});
diff --git a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
index f4bc96db86146..f55dbe678582f 100644
--- a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -passes=mergeicmps -verify-dom-info -S | FileCheck %s --check-prefix=X86
%S = type { i32, i32, i32, i32, i32}
@@ -15,7 +15,7 @@ define zeroext i1 @opeq1(
; X86-NEXT: ret i1 [[TMP2]]
;
ptr nocapture readonly dereferenceable(16) %a,
- ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
+ ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync !prof !2 {
entry:
%ptr = alloca i32
@@ -24,7 +24,7 @@ entry:
; Does other work, has no interference, merge block
store i32 42, ptr %ptr
%cmp.i = icmp eq i32 %0, %1
- br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit, !prof !3
land.rhs.i:
%second.i = getelementptr inbounds %S, ptr %a, i64 0, i32 1
@@ -32,7 +32,7 @@ land.rhs.i:
%second2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 1
%3 = load i32, ptr %second2.i, align 4
%cmp2.i = icmp eq i32 %2, %3
- br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
+ br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit, !prof !4
land.rhs.i.2:
%third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2
@@ -40,7 +40,7 @@ land.rhs.i.2:
%third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 2
%5 = load i32, ptr %third2.i, align 4
%cmp3.i = icmp eq i32 %4, %5
- br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
+ br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit, !prof !5
land.rhs.i.3:
%fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3
@@ -55,15 +55,15 @@ opeq1.exit:
ret i1 %8
}
-define zeroext i1 @part_sequent_eq_with_metadata() {
+define zeroext i1 @part_sequent_eq_with_metadata() !prof !2 {
; X86-LABEL: @part_sequent_eq_with_metadata(
; X86-NEXT: bb01:
; X86-NEXT: [[A:%.*]] = alloca [[S:%.*]], align 8
; X86-NEXT: [[B:%.*]] = alloca [[S]], align 8
-; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !range [[RNG0:![0-9]+]], !noundef !1
-; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4, !range [[RNG0]], !noundef !1
+; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !range [[RNG1:![0-9]+]], !noundef [[META2:![0-9]+]]
+; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4, !range [[RNG1]], !noundef [[META2]]
; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-; X86-NEXT: br i1 [[TMP2]], label %"bb1+bb2+bb3", label [[EXIT:%.*]]
+; X86-NEXT: br i1 [[TMP2]], label %"bb1+bb2+bb3", label [[EXIT:%.*]], !prof [[PROF3:![0-9]+]]
; X86: "bb1+bb2+bb3":
; X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 2
; X86-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 2
@@ -80,7 +80,7 @@ bb0:
%value0 = load i32, ptr %a, align 4, !range !0, !noundef !1
%value1 = load i32, ptr %b, align 4, !range !0, !noundef !1
%cmp.i = icmp eq i32 %value0, %value1
- br i1 %cmp.i, label %bb1, label %exit
+ br i1 %cmp.i, label %bb1, label %exit, !prof !3
bb1:
%second.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2
@@ -88,7 +88,7 @@ bb1:
%second2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 2
%value3 = load i32, ptr %second2.i, align 4
%cmp2.i = icmp eq i32 %value2, %value3
- br i1 %cmp2.i, label %bb2, label %exit
+ br i1 %cmp2.i, label %bb2, label %exit, !prof !4
bb2:
%third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3
@@ -96,7 +96,7 @@ bb2:
%third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 3
%value5 = load i32, ptr %third2.i, align 4
%cmp3.i = icmp eq i32 %value4, %value5
- br i1 %cmp3.i, label %bb3, label %exit
+ br i1 %cmp3.i, label %bb3, label %exit, !prof !5
bb3:
%fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 4
@@ -113,3 +113,16 @@ exit:
!0 = !{i32 0, i32 2}
!1 = !{}
+!2 = !{!"function_entry_count", i32 100}
+!3 = !{!"branch_weights", i32 2, i32 3}
+!4 = !{!"branch_weights", i32 5, i32 7}
+!5 = !{!"branch_weights", i32 11, i32 13}
+;.
+; X86: attributes #[[ATTR0:[0-9]+]] = { nofree nosync }
+; X86: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: read) }
+;.
+; X86: [[META0:![0-9]+]] = !{!"function_entry_count", i32 100}
+; X86: [[RNG1]] = !{i32 0, i32 2}
+; X86: [[META2]] = !{}
+; X86: [[PROF3]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
index bc6beefb2caee..720cbc459a284 100644
--- a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes=mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s
%S = type { i32, i32, i32, i32 }
@@ -15,11 +15,11 @@ define zeroext i1 @opeq1(
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label %"land.rhs.i+land.rhs.i.2", label [[OPEQ1_EXIT:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label %"land.rhs.i+land.rhs.i.2", label [[OPEQ1_EXIT:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: "land.rhs.i+land.rhs.i.2":
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[A]], ptr [[B]], i64 8)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[LAND_RHS_I_31:%.*]], label [[OPEQ1_EXIT]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[LAND_RHS_I_31:%.*]], label [[OPEQ1_EXIT]], !prof [[PROF2:![0-9]+]]
; CHECK: land.rhs.i.31:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 3
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 3
@@ -32,20 +32,20 @@ define zeroext i1 @opeq1(
; CHECK-NEXT: ret i1 [[TMP11]]
;
ptr nocapture readonly dereferenceable(16) %a,
- ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
+ ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync !prof !0 {
entry:
%first.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3
%0 = load i32, ptr %first.i, align 4
%first1.i = getelementptr inbounds %S, ptr %b, i64 0, i32 2
%1 = load i32, ptr %first1.i, align 4
%cmp.i = icmp eq i32 %0, %1
- br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit, !prof !1
land.rhs.i:
%2 = load i32, ptr %a, align 4
%3 = load i32, ptr %b, align 4
%cmp3.i = icmp eq i32 %2, %3
- br i1 %cmp3.i, label %land.rhs.i.2, label %opeq1.exit
+ br i1 %cmp3.i, label %land.rhs.i.2, label %opeq1.exit, !prof !2
land.rhs.i.2:
%third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 1
@@ -53,7 +53,7 @@ land.rhs.i.2:
%third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 1
%5 = load i32, ptr %third2.i, align 4
%cmp4.i = icmp eq i32 %4, %5
- br i1 %cmp4.i, label %land.rhs.i.3, label %opeq1.exit
+ br i1 %cmp4.i, label %land.rhs.i.3, label %opeq1.exit, !prof !3
land.rhs.i.3:
%fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3
@@ -67,3 +67,17 @@ opeq1.exit:
%8 = phi i1 [ false, %entry ], [ false, %land.rhs.i], [ false, %land.rhs.i.2 ], [ %cmp5.i, %land.rhs.i.3 ]
ret i1 %8
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 5, i32 7}
+!3 = !{!"branch_weights", i32 11, i32 13}
+!4 = !{!"branch_weights", i32 17, i32 19}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nofree nosync }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: read) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 55, i32 233}
+;.
|
3affc72 to
2026110
Compare
2026110 to
e5f8a76
Compare
49b9823 to
967055b
Compare
c1e4550 to
7e5054e
Compare
| ; X86-NEXT: [[B:%.*]] = alloca [[S]], align 8 | ||
| ; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !range [[RNG0:![0-9]+]], !noundef !1 | ||
| ; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4, !range [[RNG0]], !noundef !1 | ||
| ; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !range [[RNG1:![0-9]+]], !noundef [[META2:![0-9]+]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why the rename RNG0 -> RNG1 ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
UTC did that. Probably because we now pass --check-globals and some metadata is now apparent to UTC, and occurs earlier.
| !1 = !{!"branch_weights", i32 2, i32 3} | ||
| !2 = !{!"branch_weights", i32 5, i32 7} | ||
| !3 = !{!"branch_weights", i32 11, i32 13} | ||
| !4 = !{!"branch_weights", i32 17, i32 19} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unused ?
7e5054e to
1f12bc6
Compare
1f12bc6 to
22d3e3b
Compare

Propagate branch weights in
mergeComparisons : the probability of reaching the common "exit" BB (bb_phi in the description inprocessPhi)doesn't change, and is a disjunction over the probabilities of doing that from the blocks performing comparisons which are now being mergedIssue #147390