Skip to content

Commit

Permalink
[AArch64][GlobalISel] Fold G_AND into G_BRCOND
Browse files Browse the repository at this point in the history
When the G_BRCOND is fed by a eq or ne G_ICMP, it may be possible to fold a
G_AND into the branch by producing a tbnz/tbz instead.

This happens when

  1. We have a ne/eq G_ICMP feeding into the G_BRCOND
  2. The G_ICMP is a comparison against 0
  3. One of the operands of the G_AND is a power of 2 constant

This is very similar to the code in AArch64TargetLowering::LowerBR_CC.

Add opt-and-tbnz-tbz to test this.

Differential Revision: https://reviews.llvm.org/D73573
  • Loading branch information
Jessica Paquette committed Jan 28, 2020
1 parent 4e799ad commit dba29f7
Show file tree
Hide file tree
Showing 2 changed files with 330 additions and 3 deletions.
76 changes: 73 additions & 3 deletions llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Expand Up @@ -89,6 +89,11 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;

bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
int64_t CmpConstant,
const CmpInst::Predicate &Pred,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;

Expand Down Expand Up @@ -983,6 +988,64 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
}
}

bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
// Given something like this:
//
// %x = ...Something...
// %one = G_CONSTANT i64 1
// %zero = G_CONSTANT i64 0
// %and = G_AND %x, %one
// %cmp = G_ICMP intpred(ne), %and, %zero
// %cmp_trunc = G_TRUNC %cmp
// G_BRCOND %cmp_trunc, %bb.3
//
// We want to try and fold the AND into the G_BRCOND and produce either a
// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
//
// In this case, we'd get
//
// TBNZ %x %bb.3
//
if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
return false;

// Need to be comparing against 0 to fold.
if (CmpConstant != 0)
return false;

MachineRegisterInfo &MRI = *MIB.getMRI();
unsigned Opc = 0;
Register TestReg = AndInst->getOperand(1).getReg();
unsigned TestSize = MRI.getType(TestReg).getSizeInBits();

// Only support EQ and NE. If we have LT, then it *is* possible to fold, but
// we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
// so folding would be redundant.
if (Pred == CmpInst::Predicate::ICMP_EQ)
Opc = TestSize == 32 ? AArch64::TBZW : AArch64::TBZX;
else if (Pred == CmpInst::Predicate::ICMP_NE)
Opc = TestSize == 32 ? AArch64::TBNZW : AArch64::TBNZX;
else
return false;

// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
auto MaybeBit =
getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
return false;
uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));

// Construct the branch.
auto BranchMI =
MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
return true;
}

bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {

Expand All @@ -1000,9 +1063,9 @@ bool AArch64InstructionSelector::selectCompareBranch(
if (!VRegAndVal)
std::swap(RHS, LHS);

MachineIRBuilder MIB(I);
VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
if (!VRegAndVal || VRegAndVal->Value != 0) {
MachineIRBuilder MIB(I);
// If we can't select a CBZ then emit a cmp + Bcc.
if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
CCMI->getOperand(1), MIB))
Expand All @@ -1014,11 +1077,18 @@ bool AArch64InstructionSelector::selectCompareBranch(
return true;
}

// Try to fold things into the branch.
const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
MIB)) {
I.eraseFromParent();
return true;
}

const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID)
return false;

const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
return false;

Expand Down
257 changes: 257 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
@@ -0,0 +1,257 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# Verify that we can fold G_AND into G_BRCOND when all of the following hold:
# 1. We have a ne/eq G_ICMP feeding into the G_BRCOND
# 2. The G_ICMP is being compared against 0
# 3. One of the operands of the G_AND is a power of 2
#
# If all of these hold, we should produce a tbnz or a tbz.
...
---
name: tbnz_and_s64
alignment: 4
legalized: true
regBankSelected: true
body: |
; CHECK-LABEL: name: tbnz_and_s64
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: TBNZX [[COPY]], 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $x0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 8 ; Power of 2 => TBNZ uses 3 as mask
%3:gpr(s64) = G_CONSTANT i64 0
%2:gpr(s64) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: tbz_and_s64
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbz_and_s64
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: TBZX [[COPY]], 4, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $x0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 16 ; Power of 2 => TBNZ uses 4 as mask
%3:gpr(s64) = G_CONSTANT i64 0
%2:gpr(s64) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(eq), %2(s64), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: tbnz_and_s32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbnz_and_s32
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK: TBNZW [[COPY]], 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
%3:gpr(s32) = G_CONSTANT i32 0
%2:gpr(s32) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(ne), %2(s32), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: tbz_and_s32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: tbz_and_s32
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK: TBZW [[COPY]], 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
%3:gpr(s32) = G_CONSTANT i32 0
%2:gpr(s32) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(eq), %2(s32), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: dont_fold_and_lt
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: dont_fold_and_lt
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s32) = G_CONSTANT i32 1
%3:gpr(s32) = G_CONSTANT i32 0
%2:gpr(s32) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(slt), %2(s32), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: dont_fold_and_gt
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: dont_fold_and_gt
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s32) = G_CONSTANT i32 1
%3:gpr(s32) = G_CONSTANT i32 0
%2:gpr(s32) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(sgt), %2(s32), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: dont_fold_and_not_power_of_2
alignment: 4
legalized: true
regBankSelected: true
body: |
; CHECK-LABEL: name: dont_fold_and_not_power_of_2
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[COPY]], 4098
; CHECK: CBNZX [[ANDXri]], %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $x0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 7
%3:gpr(s64) = G_CONSTANT i64 0
%2:gpr(s64) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: dont_fold_cmp_not_0
alignment: 4
legalized: true
regBankSelected: true
body: |
; CHECK-LABEL: name: dont_fold_cmp_not_0
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[COPY]], 8064
; CHECK: $xzr = SUBSXri [[ANDXri]], 4, 0, implicit-def $nzcv
; CHECK: Bcc 1, %bb.1, implicit $nzcv
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $x0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 4
%3:gpr(s64) = G_CONSTANT i64 4
%2:gpr(s64) = G_AND %0, %1
%5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
%4:gpr(s1) = G_TRUNC %5(s32)
G_BRCOND %4(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR

0 comments on commit dba29f7

Please sign in to comment.