diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index e9c75f0753f89..a356d314c7af3 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -153,6 +153,37 @@ static cl::opt MisfetchCost( static cl::opt JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); + +// This enum controls how to optimize two-way branches (a conditional branch +// immediately followed by an unconditional one). The goal is to optimize for +// branch prediction and instruction cache efficiency. +enum class TwoWayBranchOptStrategy { + // Do not reverse the condition. Leave the branch code as is. + None, + // For a two-way branch, make the hot path the fallthrough path. This is more + // friendly to static branch prediction (predict not-taken). + HotPathFallthrough, + // For a two-way branch, make the cold path the fallthrough path. This + // improves i-cache efficiency as the unconditional branch is fetched less + // often. + ColdPathFallthrough +}; + +static cl::opt TwoWayBranchOpt( + "two-way-branch-opt", cl::Hidden, + cl::desc( + "Select the optimization strategy for two-way conditional branches:"), + cl::values( + clEnumValN(TwoWayBranchOptStrategy::None, "none", + "Avoid optimizing the two-way branches."), + clEnumValN( + TwoWayBranchOptStrategy::HotPathFallthrough, "hot-fallthrough", + "Make the hot path the fallthrough path for two-way branches"), + clEnumValN( + TwoWayBranchOptStrategy::ColdPathFallthrough, "cold-fallthrough", + "Make the cold path the fallthrough path for two-way branches")), + cl::init(TwoWayBranchOptStrategy::ColdPathFallthrough)); + static cl::opt TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " @@ -2979,10 +3010,16 @@ void MachineBlockPlacement::optimizeBranches() { // instructions which will benefit ICF. if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get())) continue; - // If ChainBB has a two-way branch, try to re-order the branches - // such that we branch to the successor with higher probability first. - if (MBPI->getEdgeProbability(ChainBB, TBB) >= - MBPI->getEdgeProbability(ChainBB, FBB)) + // ChainBB has a two-way branch. Reorder the branch based on + // `-two-way-branch-opt`; + auto TBBProb = MBPI->getEdgeProbability(ChainBB, TBB); + auto FBBProb = MBPI->getEdgeProbability(ChainBB, FBB); + bool ReverseBranch = + (TwoWayBranchOpt == TwoWayBranchOptStrategy::ColdPathFallthrough && + (FBBProb > TBBProb)) || + (TwoWayBranchOpt == TwoWayBranchOptStrategy::HotPathFallthrough && + (TBBProb > FBBProb)); + if (!ReverseBranch) continue; if (TII->reverseBranchCondition(Cond)) continue; diff --git a/llvm/test/CodeGen/X86/code_placement_2_way_branch.ll b/llvm/test/CodeGen/X86/code_placement_2_way_branch.ll new file mode 100644 index 0000000000000..3afa793e71ec7 --- /dev/null +++ b/llvm/test/CodeGen/X86/code_placement_2_way_branch.ll @@ -0,0 +1,70 @@ +; RUN: llc -mtriple=x86_64-linux -verify-machineinstrs -two-way-branch-opt=cold-fallthrough < %s | FileCheck %s --check-prefixes=CHECK,COLD-FT +; RUN: llc -mtriple=x86_64-linux -verify-machineinstrs -two-way-branch-opt=none < %s | FileCheck %s --check-prefixes=CHECK,COLD-FT +; RUN: llc -mtriple=x86_64-linux -verify-machineinstrs -two-way-branch-opt=hot-fallthrough < %s | FileCheck %s --check-prefixes=CHECK,HOT-FT + +define void @foo() !prof !1 { +; Test that two-way branches are optimized based on `-two-way-branch-opt`. +; +; +--------+ 5 +--------+ +; | if.then| <---- | entry | +; +--------+ +--------+ +; | | | +; | | | 10 +; | | v +; | | +--------+ +; | | | if.else| +; | | +--------+ +; | | | +; | | | 10 +; | | v +; | | 4 +--------+ +; | +---------> | if.end | +; | +--------+ +; | | +; | | 14 +; | v +; | 1 +--------+ +; +------------> | end | +; +--------+ +; +; CHECK-LABEL: foo: +; CHECK: if.else +; CHECK: .LBB0_3: # %if.end +; CHECK: .LBB0_4: # %end +; CHECK: if.then +; COLD-FT: jne .LBB0_3 +; HOT-FT: je .LBB0_4 +; COLD-FT: jmp .LBB0_4 +; HOT-FT: jmp .LBB0_3 + +entry: + call void @e() + %call1 = call zeroext i1 @a() + br i1 %call1, label %if.then, label %if.else, !prof !2 + +if.then: + call void @f() + %call2 = call zeroext i1 @a() + br i1 %call2, label %if.end, label %end, !prof !3 + +if.else: + call void @g() + br label %if.end + +if.end: + call void @h() + br label %end + +end: + ret void +} + +declare zeroext i1 @a() +declare void @e() +declare void @g() +declare void @f() +declare void @h() + +!1 = !{!"function_entry_count", i64 15} +!2 = !{!"branch_weights", i32 5, i32 10} +!3 = !{!"branch_weights", i32 4, i32 1}