-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][FixupSetCC] Substitute setcc + zext pair with setzucc if possible #96594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Shengchen Kan (KanRobert) ChangesFull diff: https://github.com/llvm/llvm-project/pull/96594.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp
index 5c7105988070c..812d3864d970b 100644
--- a/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -17,6 +17,11 @@
// performed by the setcc. Instead, we can use:
// xor %eax, %eax; seta %al
// This both avoids the stall, and encodes shorter.
+//
+// Furthurmore, we can use:
+// setzua %al
+// if feature zero-upper is available. It's faster than the xor+setcc sequence.
+// When r16-31 is used, it even encodes shorter.
//===----------------------------------------------------------------------===//
#include "X86.h"
@@ -46,6 +51,7 @@ class X86FixupSetCCPass : public MachineFunctionPass {
private:
MachineRegisterInfo *MRI = nullptr;
+ const X86Subtarget *ST = nullptr;
const X86InstrInfo *TII = nullptr;
enum { SearchBound = 16 };
@@ -61,7 +67,8 @@ FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MRI = &MF.getRegInfo();
- TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ ST = &MF.getSubtarget<X86Subtarget>();
+ TII = ST->getInstrInfo();
SmallVector<MachineInstr*, 4> ToErase;
@@ -79,7 +86,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
continue;
MachineInstr *ZExt = nullptr;
- for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg()))
+ Register Reg0 = MI.getOperand(0).getReg();
+ for (auto &Use : MRI->use_instructions(Reg0))
if (Use.getOpcode() == X86::MOVZX32rr8)
ZExt = &Use;
@@ -98,9 +106,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
continue;
// On 32-bit, we need to be careful to force an ABCD register.
- const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit()
- ? &X86::GR32RegClass
- : &X86::GR32_ABCDRegClass;
+ const TargetRegisterClass *RC =
+ ST->is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass;
if (!MRI->constrainRegClass(ZExt->getOperand(0).getReg(), RC)) {
// If we cannot constrain the register, we would need an additional copy
// and are better off keeping the MOVZX32rr8 we have now.
@@ -110,17 +117,23 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
++NumSubstZexts;
Changed = true;
- // Initialize a register with 0. This must go before the eflags def
+ // X86 setcc/setzucc only takes an output GR8, so fake a GR32 input by
+ // inserting the setcc/setzucc result into the low byte of the zeroed
+ // register.
Register ZeroReg = MRI->createVirtualRegister(RC);
- BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
- ZeroReg);
+ if (ST->hasZU()) {
+ MI.setDesc(TII->get(X86::SETZUCCr));
+ BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
+ } else
+ // Initialize a register with 0. This must go before the eflags def
+ BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
+ ZeroReg);
- // X86 setcc only takes an output GR8, so fake a GR32 input by inserting
- // the setcc result into the low byte of the zeroed register.
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
TII->get(X86::INSERT_SUBREG), ZExt->getOperand(0).getReg())
.addReg(ZeroReg)
- .addReg(MI.getOperand(0).getReg())
+ .addReg(Reg0)
.addImm(X86::sub_8bit);
ToErase.push_back(ZExt);
}
diff --git a/llvm/test/CodeGen/X86/apx/setzucc.ll b/llvm/test/CodeGen/X86/apx/setzucc.ll
new file mode 100644
index 0000000000000..0cd560576739a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/setzucc.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64 -mattr=+zu | FileCheck %s
+
+define i16 @i8(i8 %x) nounwind {
+; CHECK-LABEL: i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpb $3, %dil
+; CHECK-NEXT: setzuae %al
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+ %t0 = icmp ugt i8 %x, 2
+ %if = select i1 %t0, i8 1, i8 0
+ %zext = zext i8 %if to i16
+ ret i16 %zext
+}
+
+define i16 @i16(i16 %x) nounwind {
+; CHECK-LABEL: i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpw $2, %di
+; CHECK-NEXT: setzub %al
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+ %t0 = icmp ult i16 %x, 2
+ %if = select i1 %t0, i16 1, i16 0
+ ret i16 %if
+}
+
+define i32 @i32(i32 %x) nounwind {
+; CHECK-LABEL: i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl $1, %edi
+; CHECK-NEXT: setzue %al
+; CHECK-NEXT: retq
+ %t0 = icmp eq i32 %x, 1
+ %if = select i1 %t0, i32 1, i32 0
+ ret i32 %if
+}
+
+define i64 @i64(i64 %x) nounwind {
+; CHECK-LABEL: i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: setzune %al
+; CHECK-NEXT: retq
+ %t0 = icmp ne i64 %x, 1
+ %if = select i1 %t0, i64 1, i64 0
+ ret i64 %if
+}
|
…ure zu is available
llvm/test/CodeGen/X86/apx/setzucc.ll
Outdated
; CHECK-NEXT: retq | ||
%t0 = icmp ugt i8 %x, 2 | ||
%if = select i1 %t0, i8 1, i8 0 | ||
%zext = zext i8 %if to i16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about zext i1 %t0
to i16?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
MI.setDesc(TII->get(X86::SETZUCCr)); | ||
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), | ||
TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg); | ||
} else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add parentheses for else block.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg); | ||
} else | ||
// Initialize a register with 0. This must go before the eflags def | ||
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was MBB incorrect here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's correct. MOV32r0
is in the same bb as FlagsDefineMI
instead of INSERT_SUBREG
.
const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit() | ||
? &X86::GR32RegClass | ||
: &X86::GR32_ABCDRegClass; | ||
const TargetRegisterClass *RC = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems unrelated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's kind of related. There are 2 uses of X86Subtarget
after this PR, so I define the variable ST
for it. Unless we'd like to define the variable even for 1 use, I can not extract this into a separate NFC change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Patch llvm#96594 substitutes setcc + zext pair with setzucc, but it results in redundant test because X86FlagsCopyLowering doesn't recognize it. This patch removes redundant test by reverting setzucc to setcc (optimized) + zext.
Patch #96594 substitutes setcc + zext pair with setzucc, but it results in redundant test because X86FlagsCopyLowering doesn't recognize it. This patch removes redundant test by reverting setzucc to setcc (optimized) + zext.
No description provided.