release/18.x: [X86]: Add FPCW as a rounding control register #84058

AtariDreams · 2024-03-05T19:31:17Z

Backport 3e40c96

llvmbot · 2024-03-05T19:31:45Z

@llvm/pr-subscribers-backend-x86

Author: AtariDreams (AtariDreams)

Changes

Backport 3e40c96

Full diff: https://github.com/llvm/llvm-project/pull/84058.diff

3 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLoweringCall.cpp (+1-3)
(modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+5)
(modified) llvm/test/CodeGen/X86/pr59305.ll (+63-26)

diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index d75bd4171fde9d..62a1af31de36d9 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -670,9 +670,7 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
 }
 
 ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
-  // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
-  // tests at the moment, which is not what we expected.
-  static const MCPhysReg RCRegs[] = {X86::MXCSR};
+  static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
   return RCRegs;
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 9ac1f783b7f0a1..88e54e866fe9ed 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3437,6 +3437,11 @@ static bool isX87Reg(unsigned Reg) {
 
 /// check if the instruction is X87 instruction
 bool X86::isX87Instruction(MachineInstr &MI) {
+  // Call defs X87 register, so we special case it here because
+  // otherwise calls are incorrectly flagged as x87 instructions
+  // as a result.
+  if (MI.isCall())
+    return false;
   for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg())
       continue;
diff --git a/llvm/test/CodeGen/X86/pr59305.ll b/llvm/test/CodeGen/X86/pr59305.ll
index c2f6d21a41d4dc..4172aa6204def2 100644
--- a/llvm/test/CodeGen/X86/pr59305.ll
+++ b/llvm/test/CodeGen/X86/pr59305.ll
@@ -1,32 +1,69 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X86-64
+; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86
 
 define double @foo(double %0) #0 {
-; CHECK-LABEL: foo:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $24, %rsp
-; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT:    movl $1024, %edi # imm = 0x400
-; CHECK-NEXT:    callq fesetround@PLT
-; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
-; CHECK-NEXT:    divsd (%rsp), %xmm1 # 8-byte Folded Reload
-; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movl $1024, %edi # imm = 0x400
-; CHECK-NEXT:    callq fesetround@PLT
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; CHECK-NEXT:    divsd (%rsp), %xmm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    movl $1024, %edi # imm = 0x400
-; CHECK-NEXT:    callq fesetround@PLT
-; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
-; CHECK-NEXT:    divsd (%rsp), %xmm2 # 8-byte Folded Reload
-; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
-; CHECK-NEXT:    # xmm1 = mem[0],zero
-; CHECK-NEXT:    callq fma@PLT
-; CHECK-NEXT:    addq $24, %rsp
-; CHECK-NEXT:    retq
+; X86-64-LABEL: foo:
+; X86-64:       # %bb.0:
+; X86-64-NEXT:    subq $24, %rsp
+; X86-64-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
+; X86-64-NEXT:    movl $1024, %edi # imm = 0x400
+; X86-64-NEXT:    callq fesetround@PLT
+; X86-64-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; X86-64-NEXT:    divsd (%rsp), %xmm1 # 8-byte Folded Reload
+; X86-64-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X86-64-NEXT:    movl $1024, %edi # imm = 0x400
+; X86-64-NEXT:    callq fesetround@PLT
+; X86-64-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; X86-64-NEXT:    divsd (%rsp), %xmm0 # 8-byte Folded Reload
+; X86-64-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X86-64-NEXT:    movl $1024, %edi # imm = 0x400
+; X86-64-NEXT:    callq fesetround@PLT
+; X86-64-NEXT:    movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
+; X86-64-NEXT:    divsd (%rsp), %xmm2 # 8-byte Folded Reload
+; X86-64-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; X86-64-NEXT:    # xmm0 = mem[0],zero
+; X86-64-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; X86-64-NEXT:    # xmm1 = mem[0],zero
+; X86-64-NEXT:    callq fma@PLT
+; X86-64-NEXT:    addq $24, %rsp
+; X86-64-NEXT:    retq
+;
+; X86-LABEL: foo:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $60, %esp
+; X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    wait
+; X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; X86-NEXT:    calll fesetround@PLT
+; X86-NEXT:    fld1
+; X86-NEXT:    fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fdivrp %st, %st(1)
+; X86-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    wait
+; X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; X86-NEXT:    calll fesetround@PLT
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fdivp %st, %st(1)
+; X86-NEXT:    fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT:    wait
+; X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; X86-NEXT:    calll fesetround@PLT
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fdivp %st, %st(1)
+; X86-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT:    fstpl (%esp)
+; X86-NEXT:    wait
+; X86-NEXT:    calll fma
+; X86-NEXT:    addl $60, %esp
+; X86-NEXT:    retl
     %2 = call i32 @fesetround(i32 noundef 1024)
     %3 = call double @llvm.experimental.constrained.fdiv.f64(double 1.000000e+00, double %0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
     %4 = call i32 @fesetround(i32 noundef 1024)

…ister The reason adding fpcr broke tests is because that caused LLVM to no longer kill the instruction before a call, which prevented LLVM from treating x87 as an operand, which meant the call in itself was being treated as a waiting x87 instruction when it should not have been. This patch now has LLVM add wait to the end of a x87 instruction is a call is immediately after.

phoebewang · 2024-03-06T14:19:03Z

I don't think it worth backport. The x87 usage is not very common nowadays, and the problem is lasting for a while.
I'd suggest not do the backport considering it may have sideeffect in the isCall change.

llvmbot added the backend:X86 label Mar 5, 2024

AtariDreams changed the title ~~Backport ded5de1~~ release/18.x: [X86]: Add FPCW as a rounding control register Mar 5, 2024

AtariDreams force-pushed the 18 branch from d05cdb5 to 160bab5 Compare March 5, 2024 23:49

AtariDreams closed this Mar 6, 2024

AtariDreams deleted the 18 branch March 6, 2024 15:23

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

release/18.x: [X86]: Add FPCW as a rounding control register #84058

release/18.x: [X86]: Add FPCW as a rounding control register #84058

AtariDreams commented Mar 5, 2024

llvmbot commented Mar 5, 2024

phoebewang commented Mar 6, 2024

release/18.x: [X86]: Add FPCW as a rounding control register #84058

release/18.x: [X86]: Add FPCW as a rounding control register #84058

Conversation

AtariDreams commented Mar 5, 2024

llvmbot commented Mar 5, 2024

phoebewang commented Mar 6, 2024