Skip to content

Commit

Permalink
X86InstrInfo: Optimize more combinations of SUB+CMP
Browse files Browse the repository at this point in the history
`X86InstrInfo::optimizeCompareInstr` would only optimize a `SUB`
followed by a `CMP` in `isRedundantFlagInstr`. This extends the code to
also look for other combinations like `CMP`+`CMP`, `TEST`+`TEST`, `SUB
x,0`+`TEST`.

- Change `isRedundantFlagInstr` to run `analyzeCompareInstr` on the
  candidate instruction and compare the results. This normalizes things
  and gives consistent results for various comparisons (`CMP x, y`,
  `SUB x, y`) and immediate cases (`TEST x, x`, `SUB x, 0`,
  `CMP x, 0`...).
- Turn `isRedundantFlagInstr` into a member function so it can call
  `analyzeCompare`.  - We now also check `isRedundantFlagInstr` even if
  `IsCmpZero` is true, since we now have cases like `TEST`+`TEST`.

Differential Revision: https://reviews.llvm.org/D110865
  • Loading branch information
MatzeB committed Oct 28, 2021
1 parent e50f02b commit 97a1570
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 58 deletions.
124 changes: 78 additions & 46 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Expand Up @@ -4011,42 +4011,72 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
return false;
}

/// Check whether the first instruction, whose only
/// purpose is to update flags, can be made redundant.
/// CMPrr can be made redundant by SUBrr if the operands are the same.
/// This function can be extended later on.
/// SrcReg, SrcRegs: register operands for FlagI.
/// ImmValue: immediate for FlagI if it takes an immediate.
inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
Register SrcReg, Register SrcReg2,
int64_t ImmMask, int64_t ImmValue,
const MachineInstr &OI) {
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
(FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
(FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
(FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
((OI.getOperand(1).getReg() == SrcReg &&
OI.getOperand(2).getReg() == SrcReg2) ||
(OI.getOperand(1).getReg() == SrcReg2 &&
OI.getOperand(2).getReg() == SrcReg)))
return true;

if (ImmMask != 0 &&
((FlagI.getOpcode() == X86::CMP64ri32 &&
OI.getOpcode() == X86::SUB64ri32) ||
(FlagI.getOpcode() == X86::CMP64ri8 &&
OI.getOpcode() == X86::SUB64ri8) ||
(FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
(FlagI.getOpcode() == X86::CMP32ri8 &&
OI.getOpcode() == X86::SUB32ri8) ||
(FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
(FlagI.getOpcode() == X86::CMP16ri8 &&
OI.getOpcode() == X86::SUB16ri8) ||
(FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
OI.getOperand(1).getReg() == SrcReg &&
OI.getOperand(2).getImm() == ImmValue)
return true;
return false;
const MachineInstr &OI,
bool *IsSwapped) const {
switch (OI.getOpcode()) {
case X86::CMP64rr:
case X86::CMP32rr:
case X86::CMP16rr:
case X86::CMP8rr:
case X86::SUB64rr:
case X86::SUB32rr:
case X86::SUB16rr:
case X86::SUB8rr: {
Register OISrcReg;
Register OISrcReg2;
int64_t OIMask;
int64_t OIValue;
if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
OIMask != ImmMask || OIValue != ImmValue)
return false;
if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
*IsSwapped = false;
return true;
}
if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
*IsSwapped = true;
return true;
}
return false;
}
case X86::CMP64ri32:
case X86::CMP64ri8:
case X86::CMP32ri:
case X86::CMP32ri8:
case X86::CMP16ri:
case X86::CMP16ri8:
case X86::CMP8ri:
case X86::SUB64ri32:
case X86::SUB64ri8:
case X86::SUB32ri:
case X86::SUB32ri8:
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB8ri:
case X86::TEST64rr:
case X86::TEST32rr:
case X86::TEST16rr:
case X86::TEST8rr: {
if (ImmMask != 0) {
Register OISrcReg;
Register OISrcReg2;
int64_t OIMask;
int64_t OIValue;
if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) {
assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister &&
"should not have 2nd register");
return true;
}
}
return FlagI.isIdenticalTo(OI);
}
default:
return false;
}
}

/// Check whether the definition can be converted
Expand Down Expand Up @@ -4275,21 +4305,26 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,

bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);

// Transformation currently requires SSA values.
if (SrcReg2.isPhysical())
return false;
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
assert(SrcRegDef && "Must have a definition (SSA)");

MachineInstr *MI = nullptr;
MachineInstr *Sub = nullptr;
MachineInstr *Movr0Inst = nullptr;
bool NoSignFlag = false;
bool ClearsOverflowFlag = false;
bool ShouldUpdateCC = false;
bool IsSwapped = false;
X86::CondCode NewCC = X86::COND_INVALID;

// Search backward from CmpInstr for the next instruction defining EFLAGS.
const TargetRegisterInfo *TRI = &getRegisterInfo();
MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
MachineBasicBlock::reverse_iterator From =
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
assert(SrcRegDef && "Must have a definition (SSA)");
for (MachineBasicBlock *MBB = &CmpMBB;;) {
for (MachineInstr &Inst : make_range(From, MBB->rend())) {
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
Expand Down Expand Up @@ -4329,8 +4364,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// sub x, y or cmp x, y
// ... // EFLAGS not changed
// cmp x, y // <-- can be removed
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
CmpMask, CmpValue, Inst)) {
if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
Inst, &IsSwapped)) {
Sub = &Inst;
break;
}
Expand Down Expand Up @@ -4360,10 +4395,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
From = MBB->rbegin();
}

bool IsSwapped =
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
Sub->getOperand(2).getReg() == SrcReg);

// Scan forward from the instruction after CmpInstr for uses of EFLAGS.
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
// If we are done with the basic block, we need to check whether EFLAGS is
Expand All @@ -4386,7 +4417,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,

// EFLAGS is used by this instruction.
X86::CondCode OldCC = X86::COND_INVALID;
if (IsCmpZero || IsSwapped) {
if (MI || IsSwapped) {
// We decode the condition code from opcode.
if (Instr.isBranch())
OldCC = X86::getCondFromBranch(Instr);
Expand All @@ -4398,7 +4429,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (OldCC == X86::COND_INVALID) return false;
}
X86::CondCode ReplacementCC = X86::COND_INVALID;
if (IsCmpZero) {
if (MI) {
switch (OldCC) {
default: break;
case X86::COND_A: case X86::COND_AE:
Expand Down Expand Up @@ -4456,14 +4487,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,

// If EFLAGS is not killed nor re-defined, we should check whether it is
// live-out. If it is live-out, do not optimize.
if ((IsCmpZero || IsSwapped) && !IsSafe) {
if ((MI || IsSwapped) && !IsSafe) {
for (MachineBasicBlock *Successor : CmpMBB.successors())
if (Successor->isLiveIn(X86::EFLAGS))
return false;
}

// The instruction to be updated is either Sub or MI.
Sub = IsCmpZero ? MI : Sub;
assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
Sub = MI != nullptr ? MI : Sub;
MachineBasicBlock *SubBB = Sub->getParent();
// Move Movr0Inst to the appropriate place before Sub.
if (Movr0Inst) {
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.h
Expand Up @@ -626,6 +626,21 @@ class X86InstrInfo final : public X86GenInstrInfo {
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2,
bool IsIntrinsic = false) const;

/// Returns true when instruction \p FlagI produces the same flags as \p OI.
/// The caller should pass in the results of calling analyzeCompare on \p OI:
/// \p SrcReg, \p SrcReg2, \p ImmMask, \p ImmValue.
/// If the flags match \p OI as if it had the input operands swapped then the
/// function succeeds and sets \p IsSwapped to true.
///
/// Examples of OI, FlagI pairs returning true:
/// CMP %1, 42 and CMP %1, 42
/// CMP %1, %2 and %3 = SUB %1, %2
/// TEST %1, %1 and %2 = SUB %1, 0
/// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true
bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
const MachineInstr &OI, bool *IsSwapped) const;
};

} // namespace llvm
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
Expand Up @@ -67,7 +67,6 @@ define i16 @main_bb_2E_i9_2E_i_2E_i932_2E_ce(%struct.list* %l_addr.01.0.i2.i.i92
; CHECK-NEXT: LBB0_6: ## %NodeBlock
; CHECK-NEXT: js LBB0_9
; CHECK-NEXT: ## %bb.7: ## %LeafBlock1
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: jne LBB0_3
; CHECK-NEXT: ## %bb.8: ## %bb12.i.i935.exitStub
; CHECK-NEXT: movl %edi, (%esi)
Expand Down
12 changes: 2 additions & 10 deletions llvm/test/CodeGen/X86/optimize-compare.mir
Expand Up @@ -212,8 +212,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
%0:gr32 = COPY $esi
%1:gr32 = COPY $edi
CMP32rr %0, %1, implicit-def $eflags
Expand All @@ -231,7 +230,6 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
%0:gr64 = COPY $rsi
%1:gr64 = COPY $rdi
Expand All @@ -249,7 +247,6 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $ax
; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
%0:gr16 = COPY $ax
TEST16rr %0, %0, implicit-def $eflags
Expand All @@ -267,8 +264,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
%0:gr32 = COPY $esi
%1:gr32 = COPY $edi
CMP32rr %0, %1, implicit-def $eflags
Expand All @@ -285,7 +281,6 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
%0:gr32 = COPY $esi
CMP32ri %0, -12345, implicit-def $eflags
Expand Down Expand Up @@ -323,7 +318,6 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
%0:gr32 = COPY $esi
CMP32ri8 %0, 0, implicit-def $eflags
Expand All @@ -340,7 +334,6 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
%0:gr32 = COPY $esi
TEST32rr %0, %0, implicit-def $eflags
Expand All @@ -359,7 +352,6 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 7, implicit $eflags
; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
; CHECK-NEXT: $cl = SETCCr 3, implicit $eflags
%0:gr64 = COPY $rsi
CMP64ri32 %0, @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/X86/postalloc-coalescing.ll
Expand Up @@ -15,7 +15,6 @@ define fastcc i32 @_Z18yy_get_next_bufferv() nounwind {
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: .LBB0_3: # %bb158
; CHECK-NEXT: movb %al, 0
; CHECK-NEXT: cmpl $-1, %eax
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl
entry:
Expand Down

0 comments on commit 97a1570

Please sign in to comment.