Skip to content

Commit

Permalink
[PPC] Correctly adjust branch probability in PPCReduceCRLogicals
Browse files Browse the repository at this point in the history
In PPCReduceCRLogicals after splitting the original MBB into 2, the 2 impacted branches still use original branch probability. This is unreasonable. Suppose we have following code, and the probability of each successor is 50%.

    condc = conda || condb
    br condc, label %target, label %fallthrough

It can be transformed to following,

    br conda, label %target, label %newbb
  newbb:
    br condb, label %target, label %fallthrough

Since each branch has a probability of 50% to each successor, the total probability to %fallthrough is 25% now, and the total probability to %target is 75%. This actually changed the original profiling data. A more reasonable probability can be set to 70% to the false side for each branch instruction, so the total probability to %fallthrough is close to 50%.

This patch assumes the branch target with two incoming edges have same edge frequency and computes new probability fore each target, and keep the total probability to original targets unchanged.

Differential Revision: https://reviews.llvm.org/D62430

llvm-svn: 362237
  • Loading branch information
weiguozhi committed May 31, 2019
1 parent 24016eb commit c3a24e9
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 7 deletions.
13 changes: 13 additions & 0 deletions llvm/include/llvm/Support/BranchProbability.h
Expand Up @@ -118,6 +118,13 @@ class BranchProbability {
return *this;
}

BranchProbability &operator/=(BranchProbability RHS) {
assert(N != UnknownN && RHS.N != UnknownN &&
"Unknown probability cannot participate in arithmetics.");
N = (static_cast<uint64_t>(N) * D + RHS.N / 2) / RHS.N;
return *this;
}

BranchProbability &operator/=(uint32_t RHS) {
assert(N != UnknownN &&
"Unknown probability cannot participate in arithmetics.");
Expand Down Expand Up @@ -150,6 +157,12 @@ class BranchProbability {
return Prob;
}

BranchProbability operator/(BranchProbability RHS) const {
BranchProbability Prob(*this);
Prob /= RHS;
return Prob;
}

BranchProbability operator/(uint32_t RHS) const {
BranchProbability Prob(*this);
Prob /= RHS;
Expand Down
41 changes: 35 additions & 6 deletions llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
Expand Up @@ -166,9 +166,33 @@ static bool splitMBB(BlockSplitInfo &BSI) {
: *ThisMBB->succ_begin();
MachineBasicBlock *NewBRTarget =
BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
BranchProbability ProbToNewTarget =
!BSI.MBPI ? BranchProbability::getUnknown()
: BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);

// It's impossible to know the precise branch probability after the split.
// But it still needs to be reasonable, the whole probability to original
// targets should not be changed.
// After split NewBRTarget will get two incoming edges. Assume P0 is the
// original branch probability to NewBRTarget, P1 and P2 are new branch
// probabilies to NewBRTarget after split. If the two edge frequencies are
// same, then
// F * P1 = F * P0 / 2 ==> P1 = P0 / 2
// F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1)
BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br.
BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br.
ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
if (BSI.MBPI) {
if (BSI.BranchToFallThrough) {
ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
ProbFallThrough = ProbToNewTarget.getCompl();
ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
ProbOrigTarget = ProbOrigFallThrough.getCompl();
} else {
ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
ProbFallThrough = ProbToNewTarget.getCompl();
ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
ProbOrigFallThrough = ProbOrigTarget.getCompl();
}
}

// Create a new basic block.
MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
Expand All @@ -180,11 +204,16 @@ static bool splitMBB(BlockSplitInfo &BSI) {
// Move everything after SplitBefore into the new block.
NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
NewMBB->transferSuccessors(ThisMBB);
if (!ProbOrigTarget.isUnknown()) {
auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
}

// Add the two successors to ThisMBB. The probabilities come from the
// existing blocks if available.
// Add the two successors to ThisMBB.
ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
ThisMBB->addSuccessor(NewMBB, ProbFallThrough);

// Add the branches to ThisMBB.
BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
Expand Down
88 changes: 88 additions & 0 deletions llvm/test/CodeGen/PowerPC/reduce_cr.ll
@@ -0,0 +1,88 @@
; RUN: llc -O2 -ppc-reduce-cr-logicals -print-machine-bfi -o - %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-grtev4-linux-gnu"

; First block frequency info
;CHECK: block-frequency-info: loop_test
;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
;CHECK-NEXT: - BB2[test1]: float = 1.6667, int = 21
;CHECK-NEXT: - BB3[optional1]: float = 0.625, int = 8

;CHECK: block-frequency-info: loop_test
;CHECK: block-frequency-info: loop_test
;CHECK: block-frequency-info: loop_test

; Last block frequency info
;CHECK: block-frequency-info: loop_test
;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
;CHECK-NEXT: - BB2[for.check]: float = 2.1667, int = 27
;CHECK-NEXT: - BB3[test1]: float = 1.6667, int = 21
;CHECK-NEXT: - BB4[optional1]: float = 0.625, int = 8


define void @loop_test(i32* %tags, i32 %count) {
entry:
br label %for.check
for.check:
%count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
%done.count = icmp ugt i32 %count.loop, 0
%tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
%tag = load i32, i32* %tag_ptr
%done.tag = icmp eq i32 %tag, 0
%done = and i1 %done.count, %done.tag
br i1 %done, label %test1, label %exit, !prof !1
test1:
%tagbit1 = and i32 %tag, 1
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
optional1:
call void @a()
call void @a()
call void @a()
call void @a()
br label %test2
test2:
%tagbit2 = and i32 %tag, 2
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
optional2:
call void @b()
call void @b()
call void @b()
call void @b()
br label %test3
test3:
%tagbit3 = and i32 %tag, 4
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
optional3:
call void @c()
call void @c()
call void @c()
call void @c()
br label %test4
test4:
%tagbit4 = and i32 %tag, 8
%tagbit4eq0 = icmp eq i32 %tagbit4, 0
br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
optional4:
call void @d()
call void @d()
call void @d()
call void @d()
br label %for.latch
for.latch:
%count.sub = sub i32 %count.loop, 1
br label %for.check
exit:
ret void
}

declare void @a()
declare void @b()
declare void @c()
declare void @d()

!1 = !{!"branch_weights", i32 5, i32 3}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
@@ -1,4 +1,4 @@
; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs -tail-dup-placement=false < %s | FileCheck %s
; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs \
; RUN: -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s
target datalayout = "E-m:e-i64:64-n32:64"
Expand Down

0 comments on commit c3a24e9

Please sign in to comment.