Skip to content

Commit

Permalink
Reland "[AArch64][CodeGen] Avoid inverting hot branches during relaxa…
Browse files Browse the repository at this point in the history
…tion""

This is a reland of 46d2d75, which was
reverted because of breaking build
https://lab.llvm.org/buildbot/#/builders/21/builds/78779. However, this
buildbot is spuriously broken due to Flang::underscoring.f90 being
nondeterministic.
  • Loading branch information
dhoekwater committed Aug 21, 2023
1 parent 5f771c9 commit e223e45
Show file tree
Hide file tree
Showing 2 changed files with 328 additions and 3 deletions.
63 changes: 61 additions & 2 deletions llvm/lib/CodeGen/BranchRelaxation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ class BranchRelaxation : public MachineFunctionPass {
};

SmallVector<BasicBlockInfo, 16> BlockInfo;

// The basic block after which trampolines are inserted. This is the last
// basic block that isn't in the cold section.
MachineBasicBlock *TrampolineInsertionPoint = nullptr;
std::unique_ptr<RegScavenger> RS;
LivePhysRegs LiveRegs;

Expand Down Expand Up @@ -166,16 +170,27 @@ LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
void BranchRelaxation::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
TrampolineInsertionPoint = nullptr;

// First thing, compute the size of all basic blocks, and see if the function
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
for (MachineBasicBlock &MBB : *MF)
// instructions in the inline assembly. At the same time, place the
// trampoline insertion point at the end of the hot portion of the function.
for (MachineBasicBlock &MBB : *MF) {
BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);

if (MBB.getSectionID() != MBBSectionID::ColdSectionID)
TrampolineInsertionPoint = &MBB;
}

// Compute block offsets and known bits.
adjustBlockOffsets(*MF->begin());

if (TrampolineInsertionPoint == nullptr) {
LLVM_DEBUG(dbgs() << " No suitable trampoline insertion point found in "
<< MF->getName() << ".\n");
}
}

/// computeBlockSize - Compute the size for MBB.
Expand Down Expand Up @@ -376,6 +391,50 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
assert(!Fail && "branches to be relaxed must be analyzable");
(void)Fail;

// Since cross-section conditional branches to the cold section are rarely
// taken, try to avoid inverting the condition. Instead, add a "trampoline
// branch", which unconditionally branches to the branch destination. Place
// the trampoline branch at the end of the function and retarget the
// conditional branch to the trampoline.
// tbz L1
// =>
// tbz L1Trampoline
// ...
// L1Trampoline: b L1
if (MBB->getSectionID() != TBB->getSectionID() &&
TBB->getSectionID() == MBBSectionID::ColdSectionID &&
TrampolineInsertionPoint != nullptr) {
// If the insertion point is out of range, we can't put a trampoline there.
NewBB =
createNewBlockAfter(*TrampolineInsertionPoint, MBB->getBasicBlock());

if (isBlockInRange(MI, *NewBB)) {
LLVM_DEBUG(dbgs() << " Retarget destination to trampoline at "
<< NewBB->back());

insertUncondBranch(NewBB, TBB);

// Update the successor lists to include the trampoline.
MBB->replaceSuccessor(TBB, NewBB);
NewBB->addSuccessor(TBB);

// Replace branch in the current (MBB) block.
removeBranch(MBB);
insertBranch(MBB, NewBB, FBB, Cond);

TrampolineInsertionPoint = NewBB;
finalizeBlockChanges(MBB, NewBB);
return true;
}

LLVM_DEBUG(
dbgs() << " Trampoline insertion point out of range for Bcc from "
<< printMBBReference(*MBB) << " to " << printMBBReference(*TBB)
<< ".\n");
TrampolineInsertionPoint->setIsEndSection(NewBB->isEndSection());
MF->erase(NewBB);
}

// Add an unconditional branch to the destination and invert the branch
// condition to jump over it:
// tbz L1
Expand Down
268 changes: 267 additions & 1 deletion llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s

--- |
declare i32 @bar()
Expand All @@ -21,6 +22,73 @@
br label %end
}

define void @tbz_hot_to_cold(i1 zeroext %0) {
br i1 %0, label %hot_block, label %cold_block

hot_block: ; preds = %1
%2 = call i32 @baz()
br label %end

end: ; preds = %cold_block, %hot_block
%3 = tail call i32 @qux()
ret void

cold_block: ; preds = %1
%4 = call i32 @bar()
br label %end
}

define void @tbz_no_valid_tramp(i1 zeroext %0) {
br i1 %0, label %hot, label %cold

hot: ; preds = %1
%2 = call i32 @baz()
call void asm sideeffect ".space 1024", ""()
br label %end

end: ; preds = %cold, %hot
%3 = tail call i32 @qux()
ret void

cold: ; preds = %1
%4 = call i32 @bar()
br label %end
}

define void @tbz_cold_to_hot(i1 zeroext %0) #0 {
br i1 %0, label %cold_block, label %hot_block

cold_block: ; preds = %1
%2 = call i32 @baz()
br label %end

end: ; preds = %hot_block, %cold_block
%3 = tail call i32 @qux()
ret void

hot_block: ; preds = %1
%4 = call i32 @bar()
br label %end
}

define void @tbz_tramp_pushed_oob(i1 zeroext %0, i1 zeroext %1) {
entry:
%x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"()
br i1 %0, label %unrelaxable, label %cold

unrelaxable: ; preds = %entry
br i1 %1, label %end, label %cold

end: ; preds = %unrelaxable
call void asm sideeffect ".space 996", ""()
call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16)
ret void

cold: ; preds = %entry, %unrelaxable
call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16)
ret void
}

...
---
name: relax_tbz
Expand Down Expand Up @@ -69,3 +137,201 @@ body: |
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
...
---
name: tbz_hot_to_cold
tracksRegLiveness: true
liveins:
- { reg: '$w0', virtual-reg: '' }
stack:
- { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
body: |
; CHECK-LABEL: name: tbz_hot_to_cold
; COM: Check that branch relaxation relaxes cross-section conditional
; COM: branches by creating trampolines after all other hot basic blocks.
; CHECK: bb.0 (%ir-block.1):
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; CHECK: TBZW
; CHECK-SAME: %bb.3
; CHECK: bb.1.hot_block:
; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
; CHECK: bb.3 (%ir-block.1):
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cold_block (bbsections Cold):
; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
bb.0 (%ir-block.1):
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $w0, $lr
early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
TBZW killed renamable $w0, 0, %bb.2
bb.1.hot_block:
BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
bb.2.cold_block (bbsections Cold):
BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
...
---
name: tbz_no_valid_tramp
tracksRegLiveness: true
liveins:
- { reg: '$w0', virtual-reg: '' }
stack:
- { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
machineFunctionInfo:
hasRedZone: false
body: |
; CHECK-LABEL: name: tbz_no_valid_tramp
; COM: Check that branch relaxation doesn't insert a trampoline if there is no
; COM: viable insertion location.
; CHECK: bb.0 (%ir-block.1):
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; CHECK: CBNZW
; CHECK-SAME: %bb.1
; CHECK-NEXT: B
; CHECK-SAME: %bb.3
; CHECK: bb.1.hot:
; CHECK: TCRETURNdi
; CHECK: bb.2.cold (bbsections Cold):
; CHECK: TCRETURNdi
bb.0 (%ir-block.1):
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $w0, $lr
early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
CBZW killed renamable $w0, %bb.2
bb.1.hot:
BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
INLINEASM &".space 1024", 1 /* sideeffect attdialect */
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
bb.2.cold (bbsections Cold):
BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
...
---
name: tbz_cold_to_hot
tracksRegLiveness: true
liveins:
- { reg: '$w0', virtual-reg: '' }
stack:
- { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
machineFunctionInfo:
hasRedZone: false
body: |
; CHECK-LABEL: name: tbz_cold_to_hot
; COM: Check that relaxation of conditional branches from the Cold section to
; COM: the Hot section doesn't modify the Hot section.
; CHECK: bb.0 (%ir-block.1, bbsections Cold):
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: CBNZW
; CHECK-SAME: %bb.1
; CHECK-NEXT: B %bb.2
; CHECK: bb.1.cold_block (bbsections Cold):
; CHECK: TCRETURNdi
; CHECK: bb.2.hot_block:
; CHECK: TCRETURNdi
bb.0 (%ir-block.1, bbsections Cold):
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $w0, $lr
early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
CBZW killed renamable $w0, %bb.2
bb.1.cold_block (bbsections Cold):
BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
bb.2.hot_block:
BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
...
---
name: tbz_tramp_pushed_oob
tracksRegLiveness: true
liveins:
- { reg: '$w0', virtual-reg: '' }
- { reg: '$w1', virtual-reg: '' }
stack:
- { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
machineFunctionInfo:
hasRedZone: false
body: |
; INDIRECT-LABEL: name: tbz_tramp_pushed_oob
; COM: Check that a conditional branch to a trampoline is properly relaxed
; COM: if the trampoline is pushed out of range.
; INDIRECT: bb.0.entry:
; INDIRECT-NEXT: successors: %bb.1(0x40000000), %[[TRAMP1:bb.[0-9]+]](0x40000000)
; INDIRECT: TBNZW
; INDIRECT-SAME: %bb.1
; INDIRECT-NEXT: B{{ }}
; INDIRECT-SAME: %[[TRAMP1]]
; INDIRECT: bb.1.unrelaxable:
; INDIRECT-NEXT: successors: %bb.2(0x40000000), %[[TRAMP2:bb.[0-9]+]](0x40000000)
; INDIRECT: TBNZW
; INDIRECT-SAME: %bb.2
; INDIRECT: [[TRAMP2]]
; INDIRECT-NEXT: successors: %bb.3(0x80000000)
; INDIRECT: bb.2.end:
; INDIRECT: TCRETURNdi
; INDIRECT: [[TRAMP1]].entry:
; INDIRECT: successors: %bb.3(0x80000000)
; INDIRECT-NOT: bbsections Cold
; INDIRECT: bb.3.cold (bbsections Cold):
; INDIRECT: TCRETURNdi
bb.0.entry (%ir-block.entry):
successors: %bb.1(0x40000000), %bb.3(0x40000000)
liveins: $w0, $w1, $lr
early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16
TBZW killed renamable $w0, 0, %bb.3
bb.1.unrelaxable:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
liveins: $w1, $x16
TBNZW killed renamable $w1, 0, %bb.2
B %bb.3
bb.2.end:
liveins: $x16
INLINEASM &".space 996", 1 /* sideeffect attdialect */
INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
bb.3.cold (bbsections Cold):
liveins: $x16
INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
...

0 comments on commit e223e45

Please sign in to comment.