Skip to content

Commit ca927e5

Browse files
authored
[AArch64] Run optimizeTerminators earlier too. (#170907)
Running optimizeTerminators prior to other optimizations like branch layout can lead to more folding and better codegen, but is not on its own able to capture all cases. There is benefit to running it in both places. This adds the existing code from #161508 into the AArch64RedundantCopyElimination pass, which sounds like a sensible enough place for it.
1 parent 9f7fff1 commit ca927e5

File tree

12 files changed

+144
-199
lines changed

12 files changed

+144
-199
lines changed

llvm/lib/CodeGen/ShrinkWrap.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,8 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
618618

619619
DenseSet<const MachineBasicBlock *> DirtyBBs;
620620
for (MachineBasicBlock &MBB : MF) {
621+
if (!MDT->isReachableFromEntry(&MBB))
622+
continue;
621623
if (MBB.isEHPad()) {
622624
DirtyBBs.insert(&MBB);
623625
continue;

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,53 @@ unsigned AArch64InstrInfo::insertBranch(
708708
return 2;
709709
}
710710

711+
bool llvm::optimizeTerminators(MachineBasicBlock *MBB,
712+
const TargetInstrInfo &TII) {
713+
for (MachineInstr &MI : MBB->terminators()) {
714+
unsigned Opc = MI.getOpcode();
715+
switch (Opc) {
716+
case AArch64::CBZW:
717+
case AArch64::CBZX:
718+
case AArch64::TBZW:
719+
case AArch64::TBZX:
720+
// CBZ/TBZ with WZR/XZR -> unconditional B
721+
if (MI.getOperand(0).getReg() == AArch64::WZR ||
722+
MI.getOperand(0).getReg() == AArch64::XZR) {
723+
DEBUG_WITH_TYPE("optimizeTerminators",
724+
dbgs() << "Removing always taken branch: " << MI);
725+
MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
726+
SmallVector<MachineBasicBlock *> Succs(MBB->successors());
727+
for (auto *S : Succs)
728+
if (S != Target)
729+
MBB->removeSuccessor(S);
730+
DebugLoc DL = MI.getDebugLoc();
731+
while (MBB->rbegin() != &MI)
732+
MBB->rbegin()->eraseFromParent();
733+
MI.eraseFromParent();
734+
BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
735+
return true;
736+
}
737+
break;
738+
case AArch64::CBNZW:
739+
case AArch64::CBNZX:
740+
case AArch64::TBNZW:
741+
case AArch64::TBNZX:
742+
// CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
743+
if (MI.getOperand(0).getReg() == AArch64::WZR ||
744+
MI.getOperand(0).getReg() == AArch64::XZR) {
745+
DEBUG_WITH_TYPE("optimizeTerminators",
746+
dbgs() << "Removing never taken branch: " << MI);
747+
MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
748+
MI.getParent()->removeSuccessor(Target);
749+
MI.eraseFromParent();
750+
return true;
751+
}
752+
break;
753+
}
754+
}
755+
return false;
756+
}
757+
711758
// Find the original register that VReg is copied from.
712759
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
713760
while (Register::isVirtualRegister(VReg)) {

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,8 @@ int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset,
705705
unsigned *OutUnscaledOp = nullptr,
706706
int64_t *EmittableOffset = nullptr);
707707

708+
bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII);
709+
708710
static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
709711

710712
static inline bool isCondBranchOpcode(int Opc) {

llvm/lib/Target/AArch64/AArch64RedundantCondBranchPass.cpp

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
#include "AArch64.h"
17+
#include "AArch64InstrInfo.h"
1718
#include "llvm/CodeGen/MachineFunctionPass.h"
1819
#include "llvm/CodeGen/MachineInstrBuilder.h"
1920
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -45,51 +46,6 @@ INITIALIZE_PASS(AArch64RedundantCondBranch, "aarch64-redundantcondbranch",
4546
"AArch64 Redundant Conditional Branch Elimination pass", false,
4647
false)
4748

48-
static bool optimizeTerminators(MachineBasicBlock *MBB,
49-
const TargetInstrInfo &TII) {
50-
for (MachineInstr &MI : make_early_inc_range(MBB->terminators())) {
51-
unsigned Opc = MI.getOpcode();
52-
switch (Opc) {
53-
case AArch64::CBZW:
54-
case AArch64::CBZX:
55-
case AArch64::TBZW:
56-
case AArch64::TBZX:
57-
// CBZ/TBZ with WZR/XZR -> unconditional B
58-
if (MI.getOperand(0).getReg() == AArch64::WZR ||
59-
MI.getOperand(0).getReg() == AArch64::XZR) {
60-
LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
61-
MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
62-
SmallVector<MachineBasicBlock *> Succs(MBB->successors());
63-
for (auto *S : Succs)
64-
if (S != Target)
65-
MBB->removeSuccessor(S);
66-
DebugLoc DL = MI.getDebugLoc();
67-
while (MBB->rbegin() != &MI)
68-
MBB->rbegin()->eraseFromParent();
69-
MI.eraseFromParent();
70-
BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
71-
return true;
72-
}
73-
break;
74-
case AArch64::CBNZW:
75-
case AArch64::CBNZX:
76-
case AArch64::TBNZW:
77-
case AArch64::TBNZX:
78-
// CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
79-
if (MI.getOperand(0).getReg() == AArch64::WZR ||
80-
MI.getOperand(0).getReg() == AArch64::XZR) {
81-
LLVM_DEBUG(dbgs() << "Removing redundant branch: " << MI);
82-
MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
83-
MI.getParent()->removeSuccessor(Target);
84-
MI.eraseFromParent();
85-
return true;
86-
}
87-
break;
88-
}
89-
}
90-
return false;
91-
}
92-
9349
bool AArch64RedundantCondBranch::runOnMachineFunction(MachineFunction &MF) {
9450
if (skipFunction(MF.getFunction()))
9551
return false;

llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
// to use WZR/XZR directly in some cases.
5151
//===----------------------------------------------------------------------===//
5252
#include "AArch64.h"
53+
#include "AArch64InstrInfo.h"
5354
#include "llvm/ADT/SetVector.h"
5455
#include "llvm/ADT/Statistic.h"
5556
#include "llvm/ADT/iterator_range.h"
@@ -475,6 +476,7 @@ bool AArch64RedundantCopyElimination::runOnMachineFunction(
475476
return false;
476477
TRI = MF.getSubtarget().getRegisterInfo();
477478
MRI = &MF.getRegInfo();
479+
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
478480

479481
// Resize the clobbered and used register unit trackers. We do this once per
480482
// function.
@@ -484,8 +486,10 @@ bool AArch64RedundantCopyElimination::runOnMachineFunction(
484486
OptBBUsedRegs.init(*TRI);
485487

486488
bool Changed = false;
487-
for (MachineBasicBlock &MBB : MF)
489+
for (MachineBasicBlock &MBB : MF) {
490+
Changed |= optimizeTerminators(&MBB, TII);
488491
Changed |= optimizeBlock(&MBB);
492+
}
489493
return Changed;
490494
}
491495

llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll

Lines changed: 22 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -735,21 +735,15 @@ define void @infiniteloop() {
735735
; ENABLE-NEXT: .cfi_offset w29, -16
736736
; ENABLE-NEXT: .cfi_offset w19, -24
737737
; ENABLE-NEXT: .cfi_offset w20, -32
738-
; ENABLE-NEXT: ; %bb.1: ; %if.then
739738
; ENABLE-NEXT: sub x19, sp, #16
740739
; ENABLE-NEXT: mov sp, x19
741740
; ENABLE-NEXT: mov w20, wzr
742-
; ENABLE-NEXT: LBB10_2: ; %for.body
741+
; ENABLE-NEXT: LBB10_1: ; %for.body
743742
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
744743
; ENABLE-NEXT: bl _something
745744
; ENABLE-NEXT: add w20, w0, w20
746745
; ENABLE-NEXT: str w20, [x19]
747-
; ENABLE-NEXT: b LBB10_2
748-
; ENABLE-NEXT: ; %bb.3: ; %if.end
749-
; ENABLE-NEXT: sub sp, x29, #16
750-
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
751-
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
752-
; ENABLE-NEXT: ret
746+
; ENABLE-NEXT: b LBB10_1
753747
;
754748
; DISABLE-LABEL: infiniteloop:
755749
; DISABLE: ; %bb.0: ; %entry
@@ -761,21 +755,15 @@ define void @infiniteloop() {
761755
; DISABLE-NEXT: .cfi_offset w29, -16
762756
; DISABLE-NEXT: .cfi_offset w19, -24
763757
; DISABLE-NEXT: .cfi_offset w20, -32
764-
; DISABLE-NEXT: ; %bb.1: ; %if.then
765758
; DISABLE-NEXT: sub x19, sp, #16
766759
; DISABLE-NEXT: mov sp, x19
767760
; DISABLE-NEXT: mov w20, wzr
768-
; DISABLE-NEXT: LBB10_2: ; %for.body
761+
; DISABLE-NEXT: LBB10_1: ; %for.body
769762
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
770763
; DISABLE-NEXT: bl _something
771764
; DISABLE-NEXT: add w20, w0, w20
772765
; DISABLE-NEXT: str w20, [x19]
773-
; DISABLE-NEXT: b LBB10_2
774-
; DISABLE-NEXT: ; %bb.3: ; %if.end
775-
; DISABLE-NEXT: sub sp, x29, #16
776-
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
777-
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
778-
; DISABLE-NEXT: ret
766+
; DISABLE-NEXT: b LBB10_1
779767
entry:
780768
br i1 undef, label %if.then, label %if.end
781769

@@ -806,11 +794,10 @@ define void @infiniteloop2() {
806794
; ENABLE-NEXT: .cfi_offset w29, -16
807795
; ENABLE-NEXT: .cfi_offset w19, -24
808796
; ENABLE-NEXT: .cfi_offset w20, -32
809-
; ENABLE-NEXT: ; %bb.1: ; %if.then
810797
; ENABLE-NEXT: sub x8, sp, #16
811798
; ENABLE-NEXT: mov sp, x8
812799
; ENABLE-NEXT: mov w9, wzr
813-
; ENABLE-NEXT: LBB11_2: ; %for.body
800+
; ENABLE-NEXT: LBB11_1: ; %for.body
814801
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
815802
; ENABLE-NEXT: ; InlineAsm Start
816803
; ENABLE-NEXT: mov x10, #0 ; =0x0
@@ -821,12 +808,7 @@ define void @infiniteloop2() {
821808
; ENABLE-NEXT: ; InlineAsm Start
822809
; ENABLE-NEXT: nop
823810
; ENABLE-NEXT: ; InlineAsm End
824-
; ENABLE-NEXT: b LBB11_2
825-
; ENABLE-NEXT: ; %bb.3: ; %if.end
826-
; ENABLE-NEXT: sub sp, x29, #16
827-
; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
828-
; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
829-
; ENABLE-NEXT: ret
811+
; ENABLE-NEXT: b LBB11_1
830812
;
831813
; DISABLE-LABEL: infiniteloop2:
832814
; DISABLE: ; %bb.0: ; %entry
@@ -838,11 +820,10 @@ define void @infiniteloop2() {
838820
; DISABLE-NEXT: .cfi_offset w29, -16
839821
; DISABLE-NEXT: .cfi_offset w19, -24
840822
; DISABLE-NEXT: .cfi_offset w20, -32
841-
; DISABLE-NEXT: ; %bb.1: ; %if.then
842823
; DISABLE-NEXT: sub x8, sp, #16
843824
; DISABLE-NEXT: mov sp, x8
844825
; DISABLE-NEXT: mov w9, wzr
845-
; DISABLE-NEXT: LBB11_2: ; %for.body
826+
; DISABLE-NEXT: LBB11_1: ; %for.body
846827
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
847828
; DISABLE-NEXT: ; InlineAsm Start
848829
; DISABLE-NEXT: mov x10, #0 ; =0x0
@@ -853,12 +834,7 @@ define void @infiniteloop2() {
853834
; DISABLE-NEXT: ; InlineAsm Start
854835
; DISABLE-NEXT: nop
855836
; DISABLE-NEXT: ; InlineAsm End
856-
; DISABLE-NEXT: b LBB11_2
857-
; DISABLE-NEXT: ; %bb.3: ; %if.end
858-
; DISABLE-NEXT: sub sp, x29, #16
859-
; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
860-
; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
861-
; DISABLE-NEXT: ret
837+
; DISABLE-NEXT: b LBB11_1
862838
entry:
863839
br i1 undef, label %if.then, label %if.end
864840

@@ -889,49 +865,43 @@ if.end:
889865
define void @infiniteloop3() {
890866
; ENABLE-LABEL: infiniteloop3:
891867
; ENABLE: ; %bb.0: ; %entry
892-
; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader
893868
; ENABLE-NEXT: mov x8, xzr
894869
; ENABLE-NEXT: mov x9, xzr
895870
; ENABLE-NEXT: mov x11, xzr
896-
; ENABLE-NEXT: b LBB12_3
897-
; ENABLE-NEXT: LBB12_2: ; %loop2b
898-
; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
871+
; ENABLE-NEXT: b LBB12_2
872+
; ENABLE-NEXT: LBB12_1: ; %loop2b
873+
; ENABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
899874
; ENABLE-NEXT: str x10, [x11]
900875
; ENABLE-NEXT: mov x11, x10
901-
; ENABLE-NEXT: LBB12_3: ; %loop1
876+
; ENABLE-NEXT: LBB12_2: ; %loop1
902877
; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1
903878
; ENABLE-NEXT: mov x10, x9
904879
; ENABLE-NEXT: ldr x9, [x8]
905-
; ENABLE-NEXT: cbnz x8, LBB12_2
906-
; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
880+
; ENABLE-NEXT: cbnz x8, LBB12_1
881+
; ENABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
907882
; ENABLE-NEXT: mov x8, x10
908883
; ENABLE-NEXT: mov x11, x10
909-
; ENABLE-NEXT: b LBB12_3
910-
; ENABLE-NEXT: ; %bb.5: ; %end
911-
; ENABLE-NEXT: ret
884+
; ENABLE-NEXT: b LBB12_2
912885
;
913886
; DISABLE-LABEL: infiniteloop3:
914887
; DISABLE: ; %bb.0: ; %entry
915-
; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader
916888
; DISABLE-NEXT: mov x8, xzr
917889
; DISABLE-NEXT: mov x9, xzr
918890
; DISABLE-NEXT: mov x11, xzr
919-
; DISABLE-NEXT: b LBB12_3
920-
; DISABLE-NEXT: LBB12_2: ; %loop2b
921-
; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1
891+
; DISABLE-NEXT: b LBB12_2
892+
; DISABLE-NEXT: LBB12_1: ; %loop2b
893+
; DISABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1
922894
; DISABLE-NEXT: str x10, [x11]
923895
; DISABLE-NEXT: mov x11, x10
924-
; DISABLE-NEXT: LBB12_3: ; %loop1
896+
; DISABLE-NEXT: LBB12_2: ; %loop1
925897
; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1
926898
; DISABLE-NEXT: mov x10, x9
927899
; DISABLE-NEXT: ldr x9, [x8]
928-
; DISABLE-NEXT: cbnz x8, LBB12_2
929-
; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
900+
; DISABLE-NEXT: cbnz x8, LBB12_1
901+
; DISABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1
930902
; DISABLE-NEXT: mov x8, x10
931903
; DISABLE-NEXT: mov x11, x10
932-
; DISABLE-NEXT: b LBB12_3
933-
; DISABLE-NEXT: ; %bb.5: ; %end
934-
; DISABLE-NEXT: ret
904+
; DISABLE-NEXT: b LBB12_2
935905
entry:
936906
br i1 undef, label %loop2a, label %body
937907

llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,14 @@
88
define i8 @foo_optsize(i32 %v4) optsize {
99
; CHECK-LABEL: foo_optsize:
1010
; CHECK: // %bb.0: // %entry
11-
; CHECK-NEXT: b .LBB0_2
12-
; CHECK-NEXT: .LBB0_1:
13-
; CHECK-NEXT: mov w0, wzr
14-
; CHECK-NEXT: ret
15-
; CHECK-NEXT: .LBB0_2: // %b1
16-
; CHECK-NEXT: cbnz w0, .LBB0_4
17-
; CHECK-NEXT: // %bb.3: // %b2
11+
; CHECK-NEXT: cbnz w0, .LBB0_2
12+
; CHECK-NEXT: // %bb.1: // %b2
1813
; CHECK-NEXT: mov w0, #1 // =0x1
1914
; CHECK-NEXT: ret
20-
; CHECK-NEXT: .LBB0_4: // %b1
15+
; CHECK-NEXT: .LBB0_2: // %b1
2116
; CHECK-NEXT: cmp w0, #1
22-
; CHECK-NEXT: b.ne .LBB0_1
23-
; CHECK-NEXT: // %bb.5: // %b3
24-
; CHECK-NEXT: b .LBB0_1
17+
; CHECK-NEXT: mov w0, wzr
18+
; CHECK-NEXT: ret
2519
entry:
2620
%v2 = icmp eq i32 0, 0
2721
br i1 %v2, label %b1, label %b4
@@ -47,20 +41,14 @@ b4:
4741
define i8 @foo_optspeed(i32 %v4) {
4842
; CHECK-LABEL: foo_optspeed:
4943
; CHECK: // %bb.0: // %entry
50-
; CHECK-NEXT: b .LBB1_2
51-
; CHECK-NEXT: .LBB1_1:
52-
; CHECK-NEXT: mov w0, wzr
53-
; CHECK-NEXT: ret
54-
; CHECK-NEXT: .LBB1_2: // %b1
55-
; CHECK-NEXT: cbnz w0, .LBB1_4
56-
; CHECK-NEXT: // %bb.3: // %b2
44+
; CHECK-NEXT: cbnz w0, .LBB1_2
45+
; CHECK-NEXT: // %bb.1: // %b2
5746
; CHECK-NEXT: mov w0, #1 // =0x1
5847
; CHECK-NEXT: ret
59-
; CHECK-NEXT: .LBB1_4: // %b1
48+
; CHECK-NEXT: .LBB1_2: // %b1
6049
; CHECK-NEXT: cmp w0, #1
61-
; CHECK-NEXT: b.ne .LBB1_1
62-
; CHECK-NEXT: // %bb.5: // %b3
63-
; CHECK-NEXT: b .LBB1_1
50+
; CHECK-NEXT: mov w0, wzr
51+
; CHECK-NEXT: ret
6452
entry:
6553
%v2 = icmp eq i32 0, 0
6654
br i1 %v2, label %b1, label %b4

llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@ define i32 @check_lr_liveness(ptr %arg) #1 {
2121
; CHECK-NEXT: B %bb.3
2222
; CHECK-NEXT: {{ $}}
2323
; CHECK-NEXT: bb.1.bb:
24-
; CHECK-NEXT: successors: %bb.3(0x2aaaaaab), %bb.2(0x55555555)
2524
; CHECK-NEXT: liveins: $w0, $lr
2625
; CHECK-NEXT: {{ $}}
27-
; CHECK-NEXT: CBNZW $wzr, %bb.3
2826
; CHECK-NEXT: B %bb.2
2927
; CHECK-NEXT: {{ $}}
3028
; CHECK-NEXT: bb.2.bb1:

0 commit comments

Comments
 (0)