Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86] Support ATOMIC_LOAD_FP_BINOP_MI for other binops #87524

Merged
merged 2 commits into from
Jun 8, 2024

Conversation

AtariDreams
Copy link
Contributor

Since we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and fdiv.

@llvmbot
Copy link
Collaborator

llvmbot commented Apr 3, 2024

@llvm/pr-subscribers-backend-x86

Author: AtariDreams (AtariDreams)

Changes

Since we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and fdiv.


Patch is 72.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87524.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86InstrCompiler.td (+4-1)
  • (modified) llvm/test/CodeGen/X86/atomic-fp.ll (+2050)
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index ce3b6af4cab47b..ba20954cb779b3 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1125,7 +1125,10 @@ multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> {
             Requires<[HasAVX512]>;
 }
 defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
-// FIXME: Add fsub, fmul, fdiv, ...
+defm : ATOMIC_LOAD_FP_BINOP_MI<"SUB", fsub>;
+defm : ATOMIC_LOAD_FP_BINOP_MI<"MUL", fmul>;
+defm : ATOMIC_LOAD_FP_BINOP_MI<"DIV", fdiv>;
+// FIXME: Add fcomi, fucomi, ...
 
 multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
                         dag dag64> {
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index 1094edd19af438..125d19b75726bc 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -777,3 +777,2053 @@ bb:
   store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8
   ret void
 }
+
+; ----- FSUB -----
+
+define dso_local void @fsub_32r(ptr %loc, float %val) nounwind {
+; X86-NOSSE-LABEL: fsub_32r:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    subl $8, %esp
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl (%eax), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, (%esp)
+; X86-NOSSE-NEXT:    flds (%esp)
+; X86-NOSSE-NEXT:    fsubs {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, (%eax)
+; X86-NOSSE-NEXT:    addl $8, %esp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: fsub_32r:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    subl $8, %esp
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT:    movl (%eax), %ecx
+; X86-SSE1-NEXT:    movl %ecx, (%esp)
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    subss {{[0-9]+}}(%esp), %xmm0
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT:    movl %ecx, (%eax)
+; X86-SSE1-NEXT:    addl $8, %esp
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: fsub_32r:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT:    subss {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT:    movss %xmm0, (%eax)
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: fsub_32r:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT:    vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovss %xmm0, (%eax)
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: fsub_32r:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-SSE-NEXT:    subss %xmm0, %xmm1
+; X64-SSE-NEXT:    movss %xmm1, (%rdi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: fsub_32r:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
+; X64-AVX-NEXT:    retq
+  %1 = load atomic i32, ptr %loc seq_cst, align 4
+  %2 = bitcast i32 %1 to float
+  %sub = fsub float %2, %val
+  %3 = bitcast float %sub to i32
+  store atomic i32 %3, ptr %loc release, align 4
+  ret void
+}
+
+define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
+; X86-NOSSE-LABEL: fsub_64r:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl %ebp
+; X86-NOSSE-NEXT:    movl %esp, %ebp
+; X86-NOSSE-NEXT:    andl $-8, %esp
+; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
+; X86-NOSSE-NEXT:    fildll (%eax)
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fsubl 12(%ebp)
+; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT:    movl %ecx, (%esp)
+; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fildll (%esp)
+; X86-NOSSE-NEXT:    fistpll (%eax)
+; X86-NOSSE-NEXT:    movl %ebp, %esp
+; X86-NOSSE-NEXT:    popl %ebp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: fsub_64r:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl %ebp
+; X86-SSE1-NEXT:    movl %esp, %ebp
+; X86-SSE1-NEXT:    andl $-8, %esp
+; X86-SSE1-NEXT:    subl $16, %esp
+; X86-SSE1-NEXT:    movl 8(%ebp), %eax
+; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
+; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT:    movss %xmm1, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    fldl (%esp)
+; X86-SSE1-NEXT:    fsubl 12(%ebp)
+; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE1-NEXT:    movl %ebp, %esp
+; X86-SSE1-NEXT:    popl %ebp
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: fsub_64r:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    andl $-8, %esp
+; X86-SSE2-NEXT:    subl $8, %esp
+; X86-SSE2-NEXT:    movl 8(%ebp), %eax
+; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT:    subsd 12(%ebp), %xmm0
+; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: fsub_64r:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    andl $-8, %esp
+; X86-AVX-NEXT:    subl $8, %esp
+; X86-AVX-NEXT:    movl 8(%ebp), %eax
+; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT:    vsubsd 12(%ebp), %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
+; X86-AVX-NEXT:    movl %ebp, %esp
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: fsub_64r:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE-NEXT:    subsd %xmm0, %xmm1
+; X64-SSE-NEXT:    movsd %xmm1, (%rdi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: fsub_64r:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
+; X64-AVX-NEXT:    retq
+  %1 = load atomic i64, ptr %loc seq_cst, align 8
+  %2 = bitcast i64 %1 to double
+  %sub = fsub double %2, %val
+  %3 = bitcast double %sub to i64
+  store atomic i64 %3, ptr %loc release, align 8
+  ret void
+}
+
+; Floating-point sub to a global using an immediate.
+define dso_local void @fsub_32g() nounwind {
+; X86-NOSSE-LABEL: fsub_32g:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    subl $8, %esp
+; X86-NOSSE-NEXT:    movl glob32, %eax
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
+; X86-NOSSE-NEXT:    fld1
+; X86-NOSSE-NEXT:    fchs
+; X86-NOSSE-NEXT:    fadds (%esp)
+; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl %eax, glob32
+; X86-NOSSE-NEXT:    addl $8, %esp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: fsub_32g:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    subl $8, %esp
+; X86-SSE1-NEXT:    movl glob32, %eax
+; X86-SSE1-NEXT:    movl %eax, (%esp)
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT:    movl %eax, glob32
+; X86-SSE1-NEXT:    addl $8, %esp
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: fsub_32g:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-SSE2-NEXT:    addss glob32, %xmm0
+; X86-SSE2-NEXT:    movss %xmm0, glob32
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: fsub_32g:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-AVX-NEXT:    vaddss glob32, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovss %xmm0, glob32
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: fsub_32g:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-SSE-NEXT:    addss glob32(%rip), %xmm0
+; X64-SSE-NEXT:    movss %xmm0, glob32(%rip)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: fsub_32g:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-AVX-NEXT:    vaddss glob32(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovss %xmm0, glob32(%rip)
+; X64-AVX-NEXT:    retq
+  %i = load atomic i32, ptr @glob32 monotonic, align 4
+  %f = bitcast i32 %i to float
+  %sub = fsub float %f, 1.000000e+00
+  %s = bitcast float %sub to i32
+  store atomic i32 %s, ptr @glob32 monotonic, align 4
+  ret void
+}
+
+define dso_local void @fsub_64g() nounwind {
+; X86-NOSSE-LABEL: fsub_64g:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl %ebp
+; X86-NOSSE-NEXT:    movl %esp, %ebp
+; X86-NOSSE-NEXT:    andl $-8, %esp
+; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    fildll glob64
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fld1
+; X86-NOSSE-NEXT:    fchs
+; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fildll (%esp)
+; X86-NOSSE-NEXT:    fistpll glob64
+; X86-NOSSE-NEXT:    movl %ebp, %esp
+; X86-NOSSE-NEXT:    popl %ebp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: fsub_64g:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl %ebp
+; X86-SSE1-NEXT:    movl %esp, %ebp
+; X86-SSE1-NEXT:    andl $-8, %esp
+; X86-SSE1-NEXT:    subl $16, %esp
+; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
+; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT:    movss %xmm1, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    fld1
+; X86-SSE1-NEXT:    fchs
+; X86-SSE1-NEXT:    faddl (%esp)
+; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movlps %xmm0, glob64
+; X86-SSE1-NEXT:    movl %ebp, %esp
+; X86-SSE1-NEXT:    popl %ebp
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: fsub_64g:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    andl $-8, %esp
+; X86-SSE2-NEXT:    subl $8, %esp
+; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT:    movlps %xmm0, glob64
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: fsub_64g:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    andl $-8, %esp
+; X86-AVX-NEXT:    subl $8, %esp
+; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT:    vmovlps %xmm0, glob64
+; X86-AVX-NEXT:    movl %ebp, %esp
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: fsub_64g:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-SSE-NEXT:    addsd glob64(%rip), %xmm0
+; X64-SSE-NEXT:    movsd %xmm0, glob64(%rip)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: fsub_64g:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-AVX-NEXT:    vaddsd glob64(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovsd %xmm0, glob64(%rip)
+; X64-AVX-NEXT:    retq
+  %i = load atomic i64, ptr @glob64 monotonic, align 8
+  %f = bitcast i64 %i to double
+  %sub = fsub double %f, 1.000000e+00
+  %s = bitcast double %sub to i64
+  store atomic i64 %s, ptr @glob64 monotonic, align 8
+  ret void
+}
+
+; Floating-point sub to a hard-coded immediate location using an immediate.
+define dso_local void @fsub_32imm() nounwind {
+; X86-NOSSE-LABEL: fsub_32imm:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    subl $8, %esp
+; X86-NOSSE-NEXT:    movl -559038737, %eax
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
+; X86-NOSSE-NEXT:    fld1
+; X86-NOSSE-NEXT:    fchs
+; X86-NOSSE-NEXT:    fadds (%esp)
+; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl %eax, -559038737
+; X86-NOSSE-NEXT:    addl $8, %esp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: fsub_32imm:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    subl $8, %esp
+; X86-SSE1-NEXT:    movl -559038737, %eax
+; X86-SSE1-NEXT:    movl %eax, (%esp)
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT:    movl %eax, -559038737
+; X86-SSE1-NEXT:    addl $8, %esp
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: fsub_32imm:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-SSE2-NEXT:    addss -559038737, %xmm0
+; X86-SSE2-NEXT:    movss %xmm0, -559038737
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: fsub_32imm:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-AVX-NEXT:    vaddss -559038737, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovss %xmm0, -559038737
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: fsub_32imm:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-SSE-NEXT:    addss (%rax), %xmm0
+; X64-SSE-NEXT:    movss %xmm0, (%rax)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: fsub_32imm:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-AVX-NEXT:    vaddss (%rax), %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovss %xmm0, (%rax)
+; X64-AVX-NEXT:    retq
+  %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
+  %f = bitcast i32 %i to float
+  %sub = fsub float %f, 1.000000e+00
+  %s = bitcast float %sub to i32
+  store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
+  ret void
+}
+
+define dso_local void @fsub_64imm() nounwind {
+; X86-NOSSE-LABEL: fsub_64imm:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl %ebp
+; X86-NOSSE-NEXT:    movl %esp, %ebp
+; X86-NOSSE-NEXT:    andl $-8, %esp
+; X86-NOSSE-NEXT:    subl $32, %esp
+; X86-NOSSE-NEXT:    fildll -559038737
+; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fld1
+; X86-NOSSE-NEXT:    fchs
+; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
+; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    fildll (%esp)
+; X86-NOSSE-NEXT:    fistpll -559038737
+; X86-NOSSE-NEXT:    movl %ebp, %esp
+; X86-NOSSE-NEXT:    popl %ebp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: fsub_64imm:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl %ebp
+; X86-SSE1-NEXT:    movl %esp, %ebp
+; X86-SSE1-NEXT:    andl $-8, %esp
+; X86-SSE1-NEXT:    subl $16, %esp
+; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
+; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
+; X86-SSE1-NEXT:    movss %xmm1, (%esp)
+; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    fld1
+; X86-SSE1-NEXT:    fchs
+; X86-SSE1-NEXT:    faddl (%esp)
+; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
+; X86-SSE1-NEXT:    movlps %xmm0, -559038737
+; X86-SSE1-NEXT:    movl %ebp, %esp
+; X86-SSE1-NEXT:    popl %ebp
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: fsub_64imm:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    andl $-8, %esp
+; X86-SSE2-NEXT:    subl $8, %esp
+; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT:    movlps %xmm0, -559038737
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: fsub_64imm:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    andl $-8, %esp
+; X86-AVX-NEXT:    subl $8, %esp
+; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
+; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
+; X86-AVX-NEXT:    movl %ebp, %esp
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    retl
+;
+; X64-SSE-LABEL: fsub_64imm:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-SSE-NEXT:    addsd (%rax), %xmm0
+; X64-SSE-NEXT:    movsd %xmm0, (%rax)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: fsub_64imm:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
+; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
+; X64-AVX-NEXT:    vaddsd (%rax), %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovsd %xmm0, (%rax)
+; X64-AVX-NEXT:    retq
+  %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
+  %f = bitcast i64 %i to double
+  %sub = fsub double %f, 1.000000e+00
+  %s = bitcast double %sub to i64
+  store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
+  ret void
+}
+
+; Floating-point sub to a stack location.
+define dso_local void @fsub_32stack() nounwind {
+; X86-NOSSE-LABEL: fsub_32stack:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    subl $12, %esp
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
+; X86-NOSSE-NEXT:    fld1
+; X86-NOSSE-NEXT:    fsubs (%esp)
+; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    addl $12, %esp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: fsub_32stack:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    subl $12, %esp
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT:    movl %eax, (%esp)
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-SSE1-NEXT:    subss (%esp), %xmm0
+; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    addl $12, %...
[truncated]

Copy link

github-actions bot commented Apr 3, 2024

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.
See LLVM Discourse for more information.

@AtariDreams
Copy link
Contributor Author

@phoebewang What do you think about this?

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@phoebewang What do you think about this?

I don't know the background, just looked two patches 8662083 and 586fad5.

My guess it's a bit complex for the first patch to support them. The second one simplified it and should expand to others. I don't see problem here, but maybe @topperc should take a look.

llvm/test/CodeGen/X86/atomic-fp.ll Show resolved Hide resolved
@AtariDreams AtariDreams force-pushed the fsub branch 2 times, most recently from f40dee0 to b8e135e Compare May 2, 2024 17:17
@AtariDreams
Copy link
Contributor Author

@topperc thoughts?

llvm/test/CodeGen/X86/atomic-fp.ll Outdated Show resolved Hide resolved
llvm/test/CodeGen/X86/atomic-fp.ll Outdated Show resolved Hide resolved
llvm/test/CodeGen/X86/atomic-fp.ll Outdated Show resolved Hide resolved
llvm/test/CodeGen/X86/atomic-fp.ll Outdated Show resolved Hide resolved
Since we can bitcast and then do the same thing sub does in the table section above, I figured it was trivial to add fsub, fmul, and  fdiv.
@AtariDreams
Copy link
Contributor Author

@phoebewang Thoughts on this?

@topperc
Copy link
Collaborator

topperc commented Jun 5, 2024

Why do you keep force pushing this patch?

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@AtariDreams
Copy link
Contributor Author

@topperc can we please merge this

@phoebewang phoebewang merged commit bca7864 into llvm:main Jun 8, 2024
7 checks passed
HendrikHuebner pushed a commit to HendrikHuebner/llvm-project that referenced this pull request Jun 8, 2024
Since we can bitcast and then do the same thing sub does in the table
section above, I figured it was trivial to add fsub, fmul, and fdiv.
@AtariDreams AtariDreams deleted the fsub branch June 8, 2024 23:24
nekoshirro pushed a commit to nekoshirro/Alchemist-LLVM that referenced this pull request Jun 9, 2024
Since we can bitcast and then do the same thing sub does in the table
section above, I figured it was trivial to add fsub, fmul, and fdiv.

Signed-off-by: Hafidz Muzakky <ais.muzakky@gmail.com>
@HerrCai0907 HerrCai0907 mentioned this pull request Jun 13, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants