Skip to content

Commit

Permalink
[X86] Teach X86FloatingPoint's handleCall to only erase the FP stack …
Browse files Browse the repository at this point in the history
…if there is a regmask operand that clobbers the FP stack.

There are some calls to functions like `__alloca` that are missing
a regmask operand. Lack of a regmask operand means that all
registers that aren't mentioned by def operands are preserved.
__alloca only updates EAX and ESP and has def operands for
them so this is ok. Because there is no regmask the register
allocator won't spill the FP registers across the call. Assuming
we want to keep the FP stack untoched across these calls, we
need to handle this is in the FP stackifier.

We might want to add a proper regmask operand to the code that
creates these calls to indicate all registers are preserved, but we'd
still need this change to the FP stackifier to know to preserve the
FP stack for such a regmask.

The test is kind of long, but bugpoint wasn't able to reduce it
any further.

Fixes PR50782

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D105762
  • Loading branch information
topperc committed Jul 12, 2021
1 parent 46580d4 commit d5c97f4
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
24 changes: 24 additions & 0 deletions llvm/lib/Target/X86/X86FloatingPoint.cpp
Expand Up @@ -982,8 +982,24 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
MachineInstr &MI = *I;
unsigned STReturns = 0;

bool ClobbersFPStack = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI.getOperand(i);
// Check if this call clobbers the FP stack.
// is sufficient.
if (Op.isRegMask()) {
bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
#ifndef NDEBUG
static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
for (unsigned i = 1; i != 8; ++i)
assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
"Inconsistent FP register clobber");
#endif

if (ClobbersFP0)
ClobbersFPStack = true;
}

if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;

Expand All @@ -998,6 +1014,14 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
--e;
}

// Most calls should have a regmask that clobbers the FP registers. If it
// isn't present then the register allocator didn't spill the FP registers
// so they are still on the stack.
assert((ClobbersFPStack || STReturns == 0) &&
"ST returns without FP stack clobber");
if (!ClobbersFPStack)
return;

unsigned N = countTrailingOnes(STReturns);

// FP registers used for function return must be consecutive starting at
Expand Down
121 changes: 121 additions & 0 deletions llvm/test/CodeGen/X86/pr50782.ll
@@ -0,0 +1,121 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-w64-windows-gnu | FileCheck %s

@a = global i32 0, align 4
@b = global float 0.000000e+00, align 4
@d = global float 0.000000e+00, align 4
@f = global i32 0, align 4
@g = global float 0.000000e+00, align 4
@e = global i32 0, align 4
@c = global float* null, align 4

; The FP stack should be preserved across the call to __alloca.
define void @h(float %i) {
; CHECK-LABEL: h:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset %ebp, -8
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: .cfi_def_cfa_register %ebp
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: subl $32, %esp
; CHECK-NEXT: movl %esp, %esi
; CHECK-NEXT: .cfi_offset %esi, -12
; CHECK-NEXT: flds 8(%ebp)
; CHECK-NEXT: movl _a, %ecx
; CHECK-NEXT: leal 3(%ecx), %eax
; CHECK-NEXT: andl $-4, %eax
; CHECK-NEXT: calll __alloca
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: andl $-16, %eax
; CHECK-NEXT: movl %eax, %esp
; CHECK-NEXT: fsts 8(%esi) # 4-byte Folded Spill
; CHECK-NEXT: fadds _b
; CHECK-NEXT: fsts _d
; CHECK-NEXT: fld1
; CHECK-NEXT: fldz
; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: fld %st(0)
; CHECK-NEXT: fld %st(2)
; CHECK-NEXT: je LBB0_2
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fstp %st(0)
; CHECK-NEXT: movl _f, %ecx
; CHECK-NEXT: flds (%eax,%ecx,4)
; CHECK-NEXT: fld %st(3)
; CHECK-NEXT: LBB0_2: # %for.cond1.preheader
; CHECK-NEXT: movl _e, %ecx
; CHECK-NEXT: movl %ecx, 12(%esi)
; CHECK-NEXT: fildl 12(%esi)
; CHECK-NEXT: movl _c, %edx
; CHECK-NEXT: jmp LBB0_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_5: # %for.inc
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: fxch %st(5)
; CHECK-NEXT: fadd %st(4), %st
; CHECK-NEXT: fxch %st(5)
; CHECK-NEXT: LBB0_3: # %for.cond1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: fld %st(5)
; CHECK-NEXT: fmul %st(4), %st
; CHECK-NEXT: fdiv %st(2), %st
; CHECK-NEXT: fadd %st(3), %st
; CHECK-NEXT: fsts _g
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fucom %st(1)
; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
; CHECK-NEXT: sahf
; CHECK-NEXT: jbe LBB0_5
; CHECK-NEXT: # %bb.4: # %if.then
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: flds 8(%esi) # 4-byte Folded Reload
; CHECK-NEXT: fstps (%edx,%ecx,4)
; CHECK-NEXT: jmp LBB0_5
entry:
%0 = load i32, i32* @a, align 4
%1 = alloca i8, i32 %0, align 16
%2 = load float, float* @b, align 4
%add = fadd float %2, %i
store float %add, float* @d, align 4
%tobool.not = icmp eq i32 %0, 0
br i1 %tobool.not, label %for.cond1.preheader, label %for.body.preheader

for.body.preheader: ; preds = %entry
%3 = bitcast i8* %1 to float*
%4 = load i32, i32* @f, align 4
%arrayidx.le = getelementptr inbounds float, float* %3, i32 %4
%5 = load float, float* %arrayidx.le, align 4
br label %for.cond1.preheader

for.cond1.preheader: ; preds = %for.body.preheader, %entry
%k.0.lcssa = phi float [ %5, %for.body.preheader ], [ undef, %entry ]
%l.0.lcssa = phi float [ %add, %for.body.preheader ], [ 1.000000e+00, %entry ]
%6 = load i32, i32* @e, align 4
%conv = sitofp i32 %6 to float
%7 = load float*, float** @c, align 4
%arrayidx4 = getelementptr inbounds float, float* %7, i32 %6
br label %for.cond1

for.cond1: ; preds = %for.inc, %for.cond1.preheader
%m.0 = phi float [ %add5, %for.inc ], [ %add, %for.cond1.preheader ]
%mul = fmul float %m.0, 0.000000e+00
%div = fdiv float %mul, %l.0.lcssa
%add2 = fadd float %k.0.lcssa, %div
store float %add2, float* @g, align 4
%cmp = fcmp olt float %add2, %conv
br i1 %cmp, label %if.then, label %for.inc

if.then: ; preds = %for.cond1
store float %i, float* %arrayidx4, align 4
br label %for.inc

for.inc: ; preds = %if.then, %for.cond1
%add5 = fadd float %m.0, 1.000000e+00
br label %for.cond1
}

0 comments on commit d5c97f4

Please sign in to comment.