Skip to content

Conversation

KanRobert
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Jun 25, 2024

@llvm/pr-subscribers-backend-x86

Author: Shengchen Kan (KanRobert)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/96594.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86FixupSetCC.cpp (+24-11)
  • (added) llvm/test/CodeGen/X86/apx/setzucc.ll (+49)
diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp
index 5c7105988070c..812d3864d970b 100644
--- a/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -17,6 +17,11 @@
 // performed by the setcc. Instead, we can use:
 // xor %eax, %eax; seta %al
 // This both avoids the stall, and encodes shorter.
+//
+// Furthurmore, we can use:
+// setzua %al
+// if feature zero-upper is available. It's faster than the xor+setcc sequence.
+// When r16-31 is used, it even encodes shorter.
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
@@ -46,6 +51,7 @@ class X86FixupSetCCPass : public MachineFunctionPass {
 
 private:
   MachineRegisterInfo *MRI = nullptr;
+  const X86Subtarget *ST = nullptr;
   const X86InstrInfo *TII = nullptr;
 
   enum { SearchBound = 16 };
@@ -61,7 +67,8 @@ FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
 bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
   MRI = &MF.getRegInfo();
-  TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+  ST = &MF.getSubtarget<X86Subtarget>();
+  TII = ST->getInstrInfo();
 
   SmallVector<MachineInstr*, 4> ToErase;
 
@@ -79,7 +86,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
         continue;
 
       MachineInstr *ZExt = nullptr;
-      for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg()))
+      Register Reg0 = MI.getOperand(0).getReg();
+      for (auto &Use : MRI->use_instructions(Reg0))
         if (Use.getOpcode() == X86::MOVZX32rr8)
           ZExt = &Use;
 
@@ -98,9 +106,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
         continue;
 
       // On 32-bit, we need to be careful to force an ABCD register.
-      const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit()
-                                          ? &X86::GR32RegClass
-                                          : &X86::GR32_ABCDRegClass;
+      const TargetRegisterClass *RC =
+          ST->is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass;
       if (!MRI->constrainRegClass(ZExt->getOperand(0).getReg(), RC)) {
         // If we cannot constrain the register, we would need an additional copy
         // and are better off keeping the MOVZX32rr8 we have now.
@@ -110,17 +117,23 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
       ++NumSubstZexts;
       Changed = true;
 
-      // Initialize a register with 0. This must go before the eflags def
+      // X86 setcc/setzucc only takes an output GR8, so fake a GR32 input by
+      // inserting the setcc/setzucc result into the low byte of the zeroed
+      // register.
       Register ZeroReg = MRI->createVirtualRegister(RC);
-      BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
-              ZeroReg);
+      if (ST->hasZU()) {
+        MI.setDesc(TII->get(X86::SETZUCCr));
+        BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
+                TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
+      } else
+        // Initialize a register with 0. This must go before the eflags def
+        BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
+                ZeroReg);
 
-      // X86 setcc only takes an output GR8, so fake a GR32 input by inserting
-      // the setcc result into the low byte of the zeroed register.
       BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
               TII->get(X86::INSERT_SUBREG), ZExt->getOperand(0).getReg())
           .addReg(ZeroReg)
-          .addReg(MI.getOperand(0).getReg())
+          .addReg(Reg0)
           .addImm(X86::sub_8bit);
       ToErase.push_back(ZExt);
     }
diff --git a/llvm/test/CodeGen/X86/apx/setzucc.ll b/llvm/test/CodeGen/X86/apx/setzucc.ll
new file mode 100644
index 0000000000000..0cd560576739a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/setzucc.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64 -mattr=+zu | FileCheck %s
+
+define i16 @i8(i8 %x) nounwind {
+; CHECK-LABEL: i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpb $3, %dil
+; CHECK-NEXT:    setzuae %al
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq
+  %t0 = icmp ugt i8 %x, 2
+  %if = select i1 %t0, i8 1, i8 0
+  %zext = zext i8 %if to i16
+  ret i16 %zext
+}
+
+define i16 @i16(i16 %x) nounwind {
+; CHECK-LABEL: i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpw $2, %di
+; CHECK-NEXT:    setzub %al
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq
+  %t0 = icmp ult i16 %x, 2
+  %if = select i1 %t0, i16 1, i16 0
+  ret i16 %if
+}
+
+define i32 @i32(i32 %x) nounwind {
+; CHECK-LABEL: i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    setzue %al
+; CHECK-NEXT:    retq
+  %t0 = icmp eq i32 %x, 1
+  %if = select i1 %t0, i32 1, i32 0
+  ret i32 %if
+}
+
+define i64 @i64(i64 %x) nounwind {
+; CHECK-LABEL: i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    setzune %al
+; CHECK-NEXT:    retq
+  %t0 = icmp ne i64 %x, 1
+  %if = select i1 %t0, i64 1, i64 0
+  ret i64 %if
+}

@KanRobert KanRobert requested review from RKSimon and phoebewang June 25, 2024 05:32
@KanRobert KanRobert changed the title [X86][FixupSetCC] Substitute setcc + zext pair with setzucc when feature zu is available [X86][FixupSetCC] Substitute setcc + zext pair with setzucc if possible Jun 25, 2024
; CHECK-NEXT: retq
%t0 = icmp ugt i8 %x, 2
%if = select i1 %t0, i8 1, i8 0
%zext = zext i8 %if to i16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about zext i1 %t0 to i16?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

MI.setDesc(TII->get(X86::SETZUCCr));
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
} else
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add parentheses for else block.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
} else
// Initialize a register with 0. This must go before the eflags def
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was MBB incorrect here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's correct. MOV32r0 is in the same bb as FlagsDefineMI instead of INSERT_SUBREG.

const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit()
? &X86::GR32RegClass
: &X86::GR32_ABCDRegClass;
const TargetRegisterClass *RC =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems unrelated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's kind of related. There are 2 uses of X86Subtarget after this PR, so I define the variable ST for it. Unless we'd like to define the variable even for 1 use, I can not extract this into a separate NFC change.

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@KanRobert KanRobert merged commit a4fef26 into llvm:main Jun 26, 2024
AlexisPerry pushed a commit to llvm-project-tlp/llvm-project that referenced this pull request Jul 9, 2024
phoebewang added a commit to phoebewang/llvm-project that referenced this pull request Mar 3, 2025
Patch llvm#96594 substitutes setcc + zext pair with setzucc, but it results in
redundant test because X86FlagsCopyLowering doesn't recognize it.

This patch removes redundant test by reverting setzucc to setcc (optimized)
+ zext.
phoebewang added a commit that referenced this pull request Mar 3, 2025
Patch #96594 substitutes setcc + zext pair with setzucc, but it results
in redundant test because X86FlagsCopyLowering doesn't recognize it.

This patch removes redundant test by reverting setzucc to setcc
(optimized) + zext.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants