Skip to content

Commit

Permalink
[X86]Remove TEST in AND32ri+TEST16rr in peephole-opt
Browse files Browse the repository at this point in the history
Previously we remove a pattern like:
  %reg = and32ri %in_reg, 5
  ...                         // EFLAGS not changed.
  %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
  test64rr %src_reg, %src_reg, implicit-def $eflags
We can remove test64rr since it has same functionality as and subreg_to_reg avoid the opt in previous code, so we handle this case specially.
And this case is also can be opted for the same reason, like:
  %reg = and32ri %in_reg, 5
  ...                         // EFLAGS not changed.
  %src_reg = copy %reg.sub_16bit:gr32
  test16rr %src_reg, %src_reg, implicit-def $eflags
The COPY from gr32 to gr16 prevent the opt in previous code too, just handle it specially as what we did for test64rr.

Reviewed By: skan

Differential Revision: https://reviews.llvm.org/D154193
  • Loading branch information
XinWang10 committed Jul 10, 2023
1 parent 8542d8f commit 2c64226
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 45 deletions.
74 changes: 50 additions & 24 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -971,39 +971,55 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
MachineInstr **AndInstr,
const TargetRegisterInfo *TRI,
bool &NoSignFlag, bool &ClearsOverflowFlag) {
if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
if (!(CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG &&
CmpInstr.getOpcode() == X86::TEST64rr) &&
!(CmpValDefInstr.getOpcode() == X86::COPY &&
CmpInstr.getOpcode() == X86::TEST16rr))
return false;

if (CmpInstr.getOpcode() != X86::TEST64rr)
return false;

// CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
// guarantees that it's analyzable only if two registers are identical.
assert(
(CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
"CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
"requires two reg operands are the same.");
// CmpInstr is a TEST16rr/TEST64rr instruction, and
// `X86InstrInfo::analyzeCompare` guarantees that it's analyzable only if two
// registers are identical.
assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
"CmpInstr is an analyzable TEST16rr/TEST64rr, and "
"`X86InstrInfo::analyzeCompare` requires two reg operands are the"
"same.");

// Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
// `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
// if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
// redundant.
assert(
(MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
"Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
"Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
"is a user of COPY sub16bit.");
MachineInstr *VregDefInstr = nullptr;
if (CmpInstr.getOpcode() == X86::TEST16rr) {
VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(1).getReg());
if (!VregDefInstr)
return false;
// We can only remove test when AND32ri or AND64ri32 whose imm can fit 16bit
// size, others 32/64 bit ops would test higher bits which test16rr don't
// want to.
if (!((VregDefInstr->getOpcode() == X86::AND32ri ||
VregDefInstr->getOpcode() == X86::AND64ri32) &&
isUInt<16>(VregDefInstr->getOperand(2).getImm())))
return false;
}

// As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
// 0.
if (CmpValDefInstr.getOperand(1).getImm() != 0)
return false;
if (CmpInstr.getOpcode() == X86::TEST64rr) {
// As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is
// typically 0.
if (CmpValDefInstr.getOperand(1).getImm() != 0)
return false;

// As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
// sub_32bit or sub_xmm.
if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
return false;
// As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
// sub_32bit or sub_xmm.
if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
return false;

MachineInstr *VregDefInstr =
MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
}

assert(VregDefInstr && "Must have a definition (SSA)");

Expand All @@ -1021,6 +1037,11 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
// ... // EFLAGS not changed
// %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
// test64rr %extended_reg, %extended_reg, implicit-def $eflags
// or
// %reg = and32* ...
// ... // EFLAGS not changed.
// %src_reg = copy %reg.sub_16bit:gr32
// test16rr %src_reg, %src_reg, implicit-def $eflags
//
// If subsequent readers use a subset of bits that don't change
// after `and*` instructions, it's likely that the test64rr could
Expand Down Expand Up @@ -4421,10 +4442,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
break;
}

// Look back for the following pattern, in which case the test64rr
// instruction could be erased.
// Look back for the following pattern, in which case the
// test16rr/test64rr instruction could be erased.
//
// Example:
// Example for test16rr:
// %reg = and32ri %in_reg, 5
// ... // EFLAGS not changed.
// %src_reg = copy %reg.sub_16bit:gr32
// test16rr %src_reg, %src_reg, implicit-def $eflags
// Example for test64rr:
// %reg = and32ri %in_reg, 5
// ... // EFLAGS not changed.
// %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
Expand Down
183 changes: 162 additions & 21 deletions llvm/test/CodeGen/X86/peephole-test-after-add.mir
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,29 @@
entry:
%3 = icmp ne i16 %0, 0
%4 = and i32 %1, 123456
%truc = trunc i32 %4 to i16
%5 = icmp eq i16 %truc, 0
%trunc = trunc i32 %4 to i16
%5 = icmp eq i16 %trunc, 0
%6 = select i1 %3, i1 %5, i1 false
br i1 %6, label %if.then, label %if.end

if.then: ; preds = %entry
store i16 %0, ptr %2, align 4
store i32 %4, ptr %2, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i16 0
}

define i16 @erase_test16_sf(i16 %0, i16 %1, ptr nocapture %2) {
entry:
%3 = icmp ne i16 %0, 0
%4 = and i16 %1, 1234
%5 = icmp slt i16 %4, 0
%6 = select i1 %3, i1 %5, i1 false
br i1 %6, label %if.then, label %if.end

if.then: ; preds = %entry
store i16 %4, ptr %2, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
Expand Down Expand Up @@ -303,9 +319,8 @@ body: |
; CHECK-NEXT: bb.1.entry:
; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123, implicit-def dead $eflags
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123, implicit-def $eflags
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
; CHECK-NEXT: TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
; CHECK-NEXT: JCC_1 %bb.3, 5, implicit $eflags
; CHECK-NEXT: JMP_1 %bb.2
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -374,16 +389,16 @@ tracksDebugUserValues: false
registers:
- { id: 0, class: gr32, preferred-register: '' }
- { id: 1, class: gr32, preferred-register: '' }
- { id: 2, class: gr64, preferred-register: '' }
- { id: 3, class: gr16, preferred-register: '' }
- { id: 2, class: gr32, preferred-register: '' }
- { id: 3, class: gr64, preferred-register: '' }
- { id: 4, class: gr16, preferred-register: '' }
- { id: 5, class: gr32, preferred-register: '' }
- { id: 5, class: gr16, preferred-register: '' }
- { id: 6, class: gr32, preferred-register: '' }
- { id: 7, class: gr16, preferred-register: '' }
liveins:
- { reg: '$edi', virtual-reg: '%0' }
- { reg: '$esi', virtual-reg: '%1' }
- { reg: '$rdx', virtual-reg: '%2' }
- { reg: '$edi', virtual-reg: '%1' }
- { reg: '$esi', virtual-reg: '%2' }
- { reg: '$rdx', virtual-reg: '%3' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
Expand Down Expand Up @@ -429,7 +444,7 @@ body: |
; CHECK-NEXT: bb.1.entry:
; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 57920, implicit-def dead $eflags
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123456, implicit-def dead $eflags
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
; CHECK-NEXT: TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
; CHECK-NEXT: JCC_1 %bb.3, 5, implicit $eflags
Expand All @@ -438,7 +453,7 @@ body: |
; CHECK-NEXT: bb.2.if.then:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s16) into %ir.2, align 4)
; CHECK-NEXT: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[AND32ri]] :: (store (s32) into %ir.2)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
Expand All @@ -449,27 +464,153 @@ body: |
successors: %bb.3(0x60000000), %bb.2(0x20000000)
liveins: $edi, $esi, $rdx
%2:gr64 = COPY $rdx
%1:gr32 = COPY $esi
%0:gr32 = COPY $edi
%3:gr16 = COPY %0.sub_16bit
TEST16rr %3, %3, implicit-def $eflags
%3:gr64 = COPY $rdx
%2:gr32 = COPY $esi
%1:gr32 = COPY $edi
%5:gr16 = COPY %1.sub_16bit
TEST16rr %5, %5, implicit-def $eflags
JCC_1 %bb.2, 4, implicit $eflags
JMP_1 %bb.3
bb.3.entry:
successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
%5:gr32 = AND32ri %1, 57920, implicit-def dead $eflags
%4:gr16 = COPY %5.sub_16bit
%0:gr32 = AND32ri %2, 123456, implicit-def dead $eflags
%4:gr16 = COPY %0.sub_16bit
TEST16rr %4, %4, implicit-def $eflags
JCC_1 %bb.2, 5, implicit $eflags
JMP_1 %bb.1
bb.1.if.then:
successors: %bb.2(0x80000000)
MOV16mr %2, 1, $noreg, 0, $noreg, %3 :: (store (s16) into %ir.2, align 4)
MOV32mr %3, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.2)
bb.2.if.end:
%6:gr32 = MOV32r0 implicit-def dead $eflags
%7:gr16 = COPY %6.sub_16bit
$ax = COPY %7
RET 0, $ax
...
---
name: erase_test16_sf
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: true
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: gr16, preferred-register: '' }
- { id: 1, class: gr32, preferred-register: '' }
- { id: 2, class: gr32, preferred-register: '' }
- { id: 3, class: gr64, preferred-register: '' }
- { id: 4, class: gr16, preferred-register: '' }
- { id: 5, class: gr32, preferred-register: '' }
- { id: 6, class: gr32, preferred-register: '' }
- { id: 7, class: gr16, preferred-register: '' }
liveins:
- { reg: '$edi', virtual-reg: '%1' }
- { reg: '$esi', virtual-reg: '%2' }
- { reg: '$rdx', virtual-reg: '%3' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
functionContext: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: erase_test16_sf
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x60000000), %bb.3(0x20000000)
; CHECK-NEXT: liveins: $edi, $esi, $rdx
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdx
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit
; CHECK-NEXT: TEST16rr [[COPY3]], [[COPY3]], implicit-def $eflags
; CHECK-NEXT: JCC_1 %bb.3, 4, implicit $eflags
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.entry:
; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 1234, implicit-def dead $eflags
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
; CHECK-NEXT: TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
; CHECK-NEXT: JCC_1 %bb.3, 9, implicit $eflags
; CHECK-NEXT: JMP_1 %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.if.then:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY4]] :: (store (s16) into %ir.2, align 4)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY [[MOV32r0_]].sub_16bit
; CHECK-NEXT: $ax = COPY [[COPY5]]
; CHECK-NEXT: RET 0, $ax
bb.0.entry:
successors: %bb.3(0x60000000), %bb.2(0x20000000)
liveins: $edi, $esi, $rdx
%3:gr64 = COPY $rdx
%2:gr32 = COPY $esi
%1:gr32 = COPY $edi
%4:gr16 = COPY %1.sub_16bit
TEST16rr %4, %4, implicit-def $eflags
JCC_1 %bb.2, 4, implicit $eflags
JMP_1 %bb.3
bb.3.entry:
successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
%5:gr32 = AND32ri %2, 1234, implicit-def dead $eflags
%0:gr16 = COPY %5.sub_16bit
TEST16rr %0, %0, implicit-def $eflags
JCC_1 %bb.2, 9, implicit $eflags
JMP_1 %bb.1
bb.1.if.then:
successors: %bb.2(0x80000000)
MOV16mr %3, 1, $noreg, 0, $noreg, %0 :: (store (s16) into %ir.2, align 4)
bb.2.if.end:
%6:gr32 = MOV32r0 implicit-def dead $eflags
Expand Down

0 comments on commit 2c64226

Please sign in to comment.