Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release/18.x: [X86]: Add FPCW as a rounding control register #84058

Closed
wants to merge 1 commit into from

Conversation

AtariDreams
Copy link
Contributor

Backport 3e40c96

@AtariDreams AtariDreams changed the title Backport ded5de1 release/18.x: [X86]: Add FPCW as a rounding control register Mar 5, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Mar 5, 2024

@llvm/pr-subscribers-backend-x86

Author: AtariDreams (AtariDreams)

Changes

Backport 3e40c96


Full diff: https://github.com/llvm/llvm-project/pull/84058.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLoweringCall.cpp (+1-3)
  • (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+5)
  • (modified) llvm/test/CodeGen/X86/pr59305.ll (+63-26)
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index d75bd4171fde9d..62a1af31de36d9 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -670,9 +670,7 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
 }
 
 ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
-  // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
-  // tests at the moment, which is not what we expected.
-  static const MCPhysReg RCRegs[] = {X86::MXCSR};
+  static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
   return RCRegs;
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 9ac1f783b7f0a1..88e54e866fe9ed 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3437,6 +3437,11 @@ static bool isX87Reg(unsigned Reg) {
 
 /// check if the instruction is X87 instruction
 bool X86::isX87Instruction(MachineInstr &MI) {
+  // Call defs X87 register, so we special case it here because
+  // otherwise calls are incorrectly flagged as x87 instructions
+  // as a result.
+  if (MI.isCall())
+    return false;
   for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg())
       continue;
diff --git a/llvm/test/CodeGen/X86/pr59305.ll b/llvm/test/CodeGen/X86/pr59305.ll
index c2f6d21a41d4dc..4172aa6204def2 100644
--- a/llvm/test/CodeGen/X86/pr59305.ll
+++ b/llvm/test/CodeGen/X86/pr59305.ll
@@ -1,32 +1,69 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X86-64
+; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86
 
 define double @foo(double %0) #0 {
-; CHECK-LABEL: foo:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT:    movl $1024, %edi # imm = 0x400
-; CHECK-NEXT:    callq fesetround@PLT
-; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
-; CHECK-NEXT:    divsd (%rsp), %xmm1 # 8-byte Folded Reload
-; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movl $1024, %edi # imm = 0x400
-; CHECK-NEXT:    callq fesetround@PLT
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; CHECK-NEXT:    divsd (%rsp), %xmm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movl $1024, %edi # imm = 0x400
-; CHECK-NEXT:    callq fesetround@PLT
-; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
-; CHECK-NEXT:    divsd (%rsp), %xmm2 # 8-byte Folded Reload
-; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
-; CHECK-NEXT:    # xmm1 = mem[0],zero
-; CHECK-NEXT:    callq fma@PLT
-; CHECK-NEXT:    addq $24, %rsp
-; CHECK-NEXT:    retq
+; X86-64-LABEL: foo:
+; X86-64:       # %bb.0:
+; X86-64-NEXT:    subq $24, %rsp
+; X86-64-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
+; X86-64-NEXT:    movl $1024, %edi # imm = 0x400
+; X86-64-NEXT:    callq fesetround@PLT
+; X86-64-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; X86-64-NEXT:    divsd (%rsp), %xmm1 # 8-byte Folded Reload
+; X86-64-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X86-64-NEXT:    movl $1024, %edi # imm = 0x400
+; X86-64-NEXT:    callq fesetround@PLT
+; X86-64-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; X86-64-NEXT:    divsd (%rsp), %xmm0 # 8-byte Folded Reload
+; X86-64-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X86-64-NEXT:    movl $1024, %edi # imm = 0x400
+; X86-64-NEXT:    callq fesetround@PLT
+; X86-64-NEXT:    movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
+; X86-64-NEXT:    divsd (%rsp), %xmm2 # 8-byte Folded Reload
+; X86-64-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; X86-64-NEXT:    # xmm0 = mem[0],zero
+; X86-64-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; X86-64-NEXT:    # xmm1 = mem[0],zero
+; X86-64-NEXT:    callq fma@PLT
+; X86-64-NEXT:    addq $24, %rsp
+; X86-64-NEXT:    retq
+;
+; X86-LABEL: foo:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $60, %esp
+; X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    wait
+; X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; X86-NEXT:    calll fesetround@PLT
+; X86-NEXT:    fld1
+; X86-NEXT:    fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fdivrp %st, %st(1)
+; X86-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    wait
+; X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; X86-NEXT:    calll fesetround@PLT
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fdivp %st, %st(1)
+; X86-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    wait
+; X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; X86-NEXT:    calll fesetround@PLT
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fdivp %st, %st(1)
+; X86-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fstpl (%esp)
+; X86-NEXT:    wait
+; X86-NEXT:    calll fma
+; X86-NEXT:    addl $60, %esp
+; X86-NEXT:    retl
     %2 = call i32 @fesetround(i32 noundef 1024)
     %3 = call double @llvm.experimental.constrained.fdiv.f64(double 1.000000e+00, double %0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
     %4 = call i32 @fesetround(i32 noundef 1024)

…ister

The reason adding fpcr broke tests is because that caused LLVM to no longer kill the instruction before a call, which prevented LLVM from treating x87 as an operand, which meant the call in itself was being treated as a waiting x87 instruction when it should not have been.

This patch now has LLVM add wait to the end of a x87 instruction is a call is immediately after.
@phoebewang
Copy link
Contributor

I don't think it worth backport. The x87 usage is not very common nowadays, and the problem is lasting for a while.
I'd suggest not do the backport considering it may have sideeffect in the isCall change.

@AtariDreams AtariDreams closed this Mar 6, 2024
@AtariDreams AtariDreams deleted the 18 branch March 6, 2024 15:23
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants