diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 83c2895f91fbb3..bbc5ab13a0cd3e 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -130,6 +130,14 @@ namespace { // Remember which edges have been considered for breaking. SmallSet, 8> CEBCandidates; + // Memorize the register that also wanted to sink into the same block along + // a different critical edge. + // {register to sink, sink-to block} -> the first sink-from block. + // We're recording the first sink-from block because that (critical) edge + // was deferred until we see another register that's going to sink into the + // same block. + DenseMap, MachineBasicBlock *> + CEMergeCandidates; // Remember which edges we are about to split. // This is different from CEBCandidates since those edges // will be split. @@ -197,14 +205,17 @@ namespace { void releaseMemory() override { CEBCandidates.clear(); + CEMergeCandidates.clear(); } private: bool ProcessBlock(MachineBasicBlock &MBB); void ProcessDbgInst(MachineInstr &MI); - bool isWorthBreakingCriticalEdge(MachineInstr &MI, - MachineBasicBlock *From, - MachineBasicBlock *To); + bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, + MachineBasicBlock *To, bool BreakPHIEdge); + bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, + MachineBasicBlock *To, + MachineBasicBlock *&DeferredFromBlock); bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, MachineInstr &MI); @@ -725,6 +736,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { // Process all basic blocks. CEBCandidates.clear(); + CEMergeCandidates.clear(); ToSplit.clear(); for (auto &MBB: MF) MadeChange |= ProcessBlock(MBB); @@ -873,9 +885,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) { SeenDbgVars.insert(Var); } -bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, - MachineBasicBlock *From, - MachineBasicBlock *To) { +bool MachineSinking::isWorthBreakingCriticalEdge( + MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, + MachineBasicBlock *&DeferredFromBlock) { // FIXME: Need much better heuristics. // If the pass has already considered breaking this edge (during this pass @@ -887,6 +899,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) return true; + // Check and record the register and the destination block we want to sink + // into. Note that we want to do the following before the next check on branch + // probability. Because we want to record the initial candidate even if it's + // on hot edge, so that other candidates that might not on hot edges can be + // sinked as well. + for (const auto &MO : MI.all_defs()) { + Register Reg = MO.getReg(); + if (!Reg) + continue; + Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg; + auto Key = std::make_pair(SrcReg, To); + auto Res = CEMergeCandidates.try_emplace(Key, From); + // We wanted to sink the same register into the same block, consider it to + // be profitable. + if (!Res.second) { + // Return the source block that was previously held off. + DeferredFromBlock = Res.first->second; + return true; + } + } + if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <= BranchProbability(SplitEdgeProbabilityThreshold, 100)) return true; @@ -921,13 +954,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, return false; } -bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, - MachineBasicBlock *FromBB, - MachineBasicBlock *ToBB, - bool BreakPHIEdge) { - if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) - return false; - +bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { // Avoid breaking back edge. From == To means backedge for single BB cycle. if (!SplitEdges || FromBB == ToBB) return false; @@ -985,11 +1015,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, return false; } - ToSplit.insert(std::make_pair(FromBB, ToBB)); - return true; } +bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { + bool Status = false; + MachineBasicBlock *DeferredFromBB = nullptr; + if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) { + // If there is a DeferredFromBB, we consider FromBB only if _both_ + // of them are legal to split. + if ((!DeferredFromBB || + ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) || + isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) && + isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) { + ToSplit.insert(std::make_pair(FromBB, ToBB)); + if (DeferredFromBB) + ToSplit.insert(std::make_pair(DeferredFromBB, ToBB)); + Status = true; + } + } + + return Status; +} + std::vector & MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { // Currently to save compiling time, MBB's register pressure will not change diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll index f298a55dab721e..c84310629e5fda 100644 --- a/llvm/test/CodeGen/AArch64/and-sink.ll +++ b/llvm/test/CodeGen/AArch64/and-sink.ll @@ -46,9 +46,8 @@ bb2: define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) { ; CHECK-LABEL: and_sink2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: adrp x9, A -; CHECK-NEXT: str wzr, [x9, :lo12:A] +; CHECK-NEXT: adrp x8, A +; CHECK-NEXT: str wzr, [x8, :lo12:A] ; CHECK-NEXT: tbz w1, #0, .LBB1_5 ; CHECK-NEXT: // %bb.1: // %bb0.preheader ; CHECK-NEXT: adrp x8, B @@ -56,17 +55,15 @@ define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) { ; CHECK-NEXT: .LBB1_2: // %bb0 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: str wzr, [x8, :lo12:B] -; CHECK-NEXT: tbz w2, #0, .LBB1_6 +; CHECK-NEXT: tbz w2, #0, .LBB1_5 ; CHECK-NEXT: // %bb.3: // %bb1 ; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: str wzr, [x9, :lo12:C] ; CHECK-NEXT: tbnz w0, #2, .LBB1_2 ; CHECK-NEXT: // %bb.4: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: .LBB1_5: // %common.ret -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_6: +; CHECK-NEXT: .LBB1_5: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll index d92bbfd7a21d66..49e31447c1c0d1 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -4,13 +4,11 @@ define i64 @test_or(i32 %a, i32 %b) { ; CHECK-LABEL: test_or: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB0_2 +; CHECK-NEXT: LBB0_1: ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB0_2 -; CHECK-NEXT: LBB0_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB0_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -37,13 +35,11 @@ bb4: define i64 @test_or_select(i32 %a, i32 %b) { ; CHECK-LABEL: test_or_select: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB1_2 +; CHECK-NEXT: LBB1_1: ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB1_2 -; CHECK-NEXT: LBB1_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB1_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -70,13 +66,11 @@ bb4: define i64 @test_and(i32 %a, i32 %b) { ; CHECK-LABEL: test_and: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB2_2 +; CHECK-NEXT: LBB2_1: ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB2_2 -; CHECK-NEXT: LBB2_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB2_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB2_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -103,13 +97,11 @@ bb4: define i64 @test_and_select(i32 %a, i32 %b) { ; CHECK-LABEL: test_and_select: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB3_2 +; CHECK-NEXT: LBB3_1: ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB3_2 -; CHECK-NEXT: LBB3_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB3_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB3_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll index 21690c439b2d96..6714991d091cfc 100644 --- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll +++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll @@ -13,7 +13,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind { ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: call toupper ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: beqz s0, .LBB0_25 +; CHECK-NEXT: beqz s0, .LBB0_26 ; CHECK-NEXT: # %bb.1: # %while.body.preheader ; CHECK-NEXT: li a2, 1 ; CHECK-NEXT: li a3, 9 @@ -55,36 +55,34 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind { ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: beq a2, a3, .LBB0_2 ; CHECK-NEXT: # %bb.14: # %while.body.6 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: beqz a2, .LBB0_25 +; CHECK-NEXT: beqz a2, .LBB0_24 ; CHECK-NEXT: # %bb.15: # %strdup.exit.split.loop.exit126 ; CHECK-NEXT: addi s0, s1, 7 -; CHECK-NEXT: j .LBB0_24 -; CHECK-NEXT: .LBB0_16: # %while.body -; CHECK-NEXT: bnez a2, .LBB0_18 ; CHECK-NEXT: j .LBB0_25 +; CHECK-NEXT: .LBB0_16: # %while.body +; CHECK-NEXT: beqz a2, .LBB0_26 +; CHECK-NEXT: j .LBB0_18 ; CHECK-NEXT: .LBB0_17: # %while.body.1 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: beqz a2, .LBB0_25 +; CHECK-NEXT: beqz a2, .LBB0_24 ; CHECK-NEXT: .LBB0_18: # %strdup.exit.loopexit ; CHECK-NEXT: li s0, 0 -; CHECK-NEXT: j .LBB0_24 +; CHECK-NEXT: j .LBB0_25 ; CHECK-NEXT: .LBB0_19: # %while.body.3 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: beqz a2, .LBB0_25 +; CHECK-NEXT: beqz a2, .LBB0_24 ; CHECK-NEXT: # %bb.20: # %strdup.exit.split.loop.exit120 ; CHECK-NEXT: addi s0, s1, 4 -; CHECK-NEXT: j .LBB0_24 +; CHECK-NEXT: j .LBB0_25 ; CHECK-NEXT: .LBB0_21: # %while.body.4 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: beqz a2, .LBB0_25 +; CHECK-NEXT: beqz a2, .LBB0_24 ; CHECK-NEXT: # %bb.22: # %strdup.exit.split.loop.exit122 ; CHECK-NEXT: addi s0, s1, 5 -; CHECK-NEXT: j .LBB0_24 +; CHECK-NEXT: j .LBB0_25 ; CHECK-NEXT: .LBB0_23: # %while.body.5 +; CHECK-NEXT: bnez a2, .LBB0_25 +; CHECK-NEXT: .LBB0_24: ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: beqz a2, .LBB0_25 -; CHECK-NEXT: .LBB0_24: # %strdup.exit +; CHECK-NEXT: j .LBB0_26 +; CHECK-NEXT: .LBB0_25: # %strdup.exit ; CHECK-NEXT: li s1, 0 ; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: li a0, 0 @@ -95,7 +93,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: jalr s1 ; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: .LBB0_25: # %return +; CHECK-NEXT: .LBB0_26: # %return ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload