Skip to content

Commit

Permalink
[X86FixupSetCC] Remember the preceding eflags defining instruction wh…
Browse files Browse the repository at this point in the history
…ile we're scanning the basic block instead of looking back for it.

Summary:
We're already scanning forward through the basic block. Might as
well just remember eflags defs instead of doing a bounded search
backwards later.

Based on a comment in D71841.

Reviewers: RKSimon, spatel, uweigand

Reviewed By: uweigand

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71865
  • Loading branch information
topperc committed Dec 25, 2019
1 parent caf460d commit 4af5b23
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 69 deletions.
32 changes: 5 additions & 27 deletions llvm/lib/Target/X86/X86FixupSetCC.cpp
Expand Up @@ -43,14 +43,6 @@ class X86FixupSetCCPass : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;

private:
// Find the preceding instruction that imp-defs eflags.
MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB,
MachineBasicBlock::reverse_iterator MI);

// Return true if this is the opcode of a SetCC instruction with a register
// output.
bool isSetCCr(unsigned Opode);

MachineRegisterInfo *MRI = nullptr;
const X86InstrInfo *TII = nullptr;

Expand All @@ -64,22 +56,6 @@ char X86FixupSetCCPass::ID = 0;

FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }

// We expect the instruction *immediately* before the setcc to imp-def
// EFLAGS (because of scheduling glue). To make this less brittle w.r.t
// scheduling, look backwards until we hit the beginning of the
// basic-block, or a small bound (to avoid quadratic behavior).
MachineInstr *
X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
MachineBasicBlock::reverse_iterator MI) {
// FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator?
auto MBBStart = MBB->rend();
for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
if (MI->definesRegister(X86::EFLAGS))
return &*MI;

return nullptr;
}

bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MRI = &MF.getRegInfo();
Expand All @@ -88,7 +64,12 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr*, 4> ToErase;

for (auto &MBB : MF) {
MachineInstr *FlagsDefMI = nullptr;
for (auto &MI : MBB) {
// Remember the most recent preceding eflags defining instruction.
if (MI.definesRegister(X86::EFLAGS))
FlagsDefMI = &MI;

// Find a setcc that is used by a zext.
// This doesn't have to be the only use, the transformation is safe
// regardless.
Expand All @@ -103,9 +84,6 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
if (!ZExt)
continue;

// Find the preceding instruction that imp-defs eflags.
MachineInstr *FlagsDefMI = findFlagsImpDef(
MI.getParent(), MachineBasicBlock::reverse_iterator(&MI));
if (!FlagsDefMI)
continue;

Expand Down
86 changes: 44 additions & 42 deletions llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
Expand Up @@ -194,7 +194,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
Expand All @@ -210,8 +210,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: xorl %edx, %edx
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
Expand All @@ -221,12 +220,14 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: xorl %edx, %edx
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
Expand All @@ -236,13 +237,13 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp)
; AVX512VL-32-NEXT: fldl (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm4, %xmm4 {%k1}
; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
Expand All @@ -252,6 +253,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
Expand All @@ -263,21 +265,20 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2
; AVX512VL-32-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: movzbl %al, %eax
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Expand Down Expand Up @@ -498,7 +499,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
Expand All @@ -514,8 +515,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: xorl %edx, %edx
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
Expand All @@ -525,11 +525,13 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: xorl %edx, %edx
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
Expand All @@ -539,14 +541,14 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm4, (%esp)
; AVX512VL-32-NEXT: flds (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1}
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
Expand All @@ -556,6 +558,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
Expand All @@ -567,21 +570,20 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2
; AVX512VL-32-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: movzbl %al, %eax
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Expand Down

0 comments on commit 4af5b23

Please sign in to comment.