diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 056fc9b7ff6d1..b7ff255f0881f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27518,14 +27518,14 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { return false; } -// TODO: In 32-bit mode, use FISTP when X87 is available? bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { Type *MemType = SI->getValueOperand()->getType(); bool NoImplicitFloatOps = SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && - !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE1()) + !Subtarget.useSoftFloat() && !NoImplicitFloatOps && + (Subtarget.hasSSE1() || Subtarget.hasX87())) return false; return needsCmpXchgNb(MemType); @@ -28286,28 +28286,52 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, if (VT == MVT::i64 && !IsTypeLegal) { // For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE // is enabled. - // FIXME: Use fist with X87. bool NoImplicitFloatOps = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); - if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && - Subtarget.hasSSE1()) { - SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, - Node->getOperand(2)); - MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; - SclToVec = DAG.getBitcast(StVT, SclToVec); - SDVTList Tys = DAG.getVTList(MVT::Other); - SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() }; - SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, - Ops, MVT::i64, - Node->getMemOperand()); - - // If this is a sequentially consistent store, also emit an appropriate - // barrier. - if (IsSeqCst) - Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl); - - return Chain; + if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { + SDValue Chain; + if (Subtarget.hasSSE1()) { + SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, + Node->getOperand(2)); + MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; + SclToVec = DAG.getBitcast(StVT, SclToVec); + SDVTList Tys = DAG.getVTList(MVT::Other); + SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()}; + Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, + MVT::i64, Node->getMemOperand()); + } else if (Subtarget.hasX87()) { + // First load this into an 80-bit X87 register using a stack temporary. + // This will put the whole integer into the significand. + SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + Chain = + DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr, + MPI, /*Align*/ 0, MachineMemOperand::MOStore); + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); + SDValue LdOps[] = {Chain, StackPtr}; + SDValue Value = + DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI, + /*Align*/ 0, MachineMemOperand::MOLoad); + Chain = Value.getValue(1); + + // Now use an FIST to do the atomic store. + SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()}; + Chain = + DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other), + StoreOps, MVT::i64, Node->getMemOperand()); + } + + if (Chain) { + // If this is a sequentially consistent store, also emit an appropriate + // barrier. + if (IsSeqCst) + Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl); + + return Chain; + } } } diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll index 1f8f64399831c..01e0480a815be 100644 --- a/llvm/test/CodeGen/X86/atomic-fp.ll +++ b/llvm/test/CodeGen/X86/atomic-fp.ll @@ -80,33 +80,25 @@ define void @fadd_64r(double* %loc, double %val) nounwind { ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $24, %esp -; X86-NOSSE-NEXT: movl 8(%ebp), %esi -; X86-NOSSE-NEXT: fildll (%esi) +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: movl 8(%ebp), %eax +; X86-NOSSE-NEXT: fildll (%eax) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) -; X86-NOSSE-NEXT: fstpl (%esp) -; X86-NOSSE-NEXT: movl (%esp), %ebx +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl (%esi), %eax -; X86-NOSSE-NEXT: movl 4(%esi), %edx -; X86-NOSSE-NEXT: .p2align 4, 0x90 -; X86-NOSSE-NEXT: .LBB1_1: # %atomicrmw.start -; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) -; X86-NOSSE-NEXT: jne .LBB1_1 -; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOSSE-NEXT: leal -8(%ebp), %esp -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; @@ -256,7 +248,6 @@ define void @fadd_64g() nounwind { ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp -; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp ; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: fildll glob64 @@ -267,19 +258,14 @@ define void @fadd_64g() nounwind { ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fstpl (%esp) -; X86-NOSSE-NEXT: movl (%esp), %ebx +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl glob64+4, %edx -; X86-NOSSE-NEXT: movl glob64, %eax -; X86-NOSSE-NEXT: .p2align 4, 0x90 -; X86-NOSSE-NEXT: .LBB3_1: # %atomicrmw.start -; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOSSE-NEXT: lock cmpxchg8b glob64 -; X86-NOSSE-NEXT: jne .LBB3_1 -; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOSSE-NEXT: leal -4(%ebp), %esp -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll glob64 +; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; @@ -426,7 +412,6 @@ define void @fadd_64imm() nounwind { ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp -; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp ; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: fildll -559038737 @@ -437,19 +422,14 @@ define void @fadd_64imm() nounwind { ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fstpl (%esp) -; X86-NOSSE-NEXT: movl (%esp), %ebx +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl -559038737, %eax -; X86-NOSSE-NEXT: movl -559038733, %edx -; X86-NOSSE-NEXT: .p2align 4, 0x90 -; X86-NOSSE-NEXT: .LBB5_1: # %atomicrmw.start -; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOSSE-NEXT: lock cmpxchg8b -559038737 -; X86-NOSSE-NEXT: jne .LBB5_1 -; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOSSE-NEXT: leal -4(%ebp), %esp -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll -559038737 +; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; @@ -602,10 +582,9 @@ define void @fadd_64stack() nounwind { ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp -; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp ; X86-NOSSE-NEXT: subl $40, %esp -; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -614,18 +593,13 @@ define void @fadd_64stack() nounwind { ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl (%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: .p2align 4, 0x90 -; X86-NOSSE-NEXT: .LBB7_1: # %atomicrmw.start -; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOSSE-NEXT: lock cmpxchg8b (%esp) -; X86-NOSSE-NEXT: jne .LBB7_1 -; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOSSE-NEXT: leal -4(%ebp), %esp -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; @@ -709,36 +683,28 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind { ; X86-NOSSE: # %bb.0: # %bb ; X86-NOSSE-NEXT: pushl %ebp ; X86-NOSSE-NEXT: movl %esp, %ebp -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: pushl %edi ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp -; X86-NOSSE-NEXT: movl 20(%ebp), %esi -; X86-NOSSE-NEXT: movl 8(%ebp), %edi -; X86-NOSSE-NEXT: fildll (%edi,%esi,8) +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: movl 20(%ebp), %eax +; X86-NOSSE-NEXT: movl 8(%ebp), %ecx +; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) -; X86-NOSSE-NEXT: fstpl (%esp) -; X86-NOSSE-NEXT: movl (%esp), %ebx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl (%edi,%esi,8), %eax -; X86-NOSSE-NEXT: movl 4(%edi,%esi,8), %edx -; X86-NOSSE-NEXT: .p2align 4, 0x90 -; X86-NOSSE-NEXT: .LBB8_1: # %atomicrmw.start -; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8) -; X86-NOSSE-NEXT: jne .LBB8_1 -; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOSSE-NEXT: leal -12(%ebp), %esp +; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl %edx, (%esp) +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) +; X86-NOSSE-NEXT: leal -4(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx ; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/atomic-load-store-wide.ll b/llvm/test/CodeGen/X86/atomic-load-store-wide.ll index bdb88564cf844..ebba18b0a315a 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store-wide.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store-wide.ll @@ -16,27 +16,24 @@ define void @test1(i64* %ptr, i64 %val1) { ; ; NOSSE-LABEL: test1: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebx -; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: pushl %esi -; NOSSE-NEXT: .cfi_def_cfa_offset 12 -; NOSSE-NEXT: .cfi_offset %esi, -12 -; NOSSE-NEXT: .cfi_offset %ebx, -8 -; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; NOSSE-NEXT: movl (%esi), %eax -; NOSSE-NEXT: movl 4(%esi), %edx -; NOSSE-NEXT: .p2align 4, 0x90 -; NOSSE-NEXT: .LBB0_1: # %atomicrmw.start -; NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; NOSSE-NEXT: lock cmpxchg8b (%esi) -; NOSSE-NEXT: jne .LBB0_1 -; NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; NOSSE-NEXT: popl %esi +; NOSSE-NEXT: pushl %ebp ; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: popl %ebx -; NOSSE-NEXT: .cfi_def_cfa_offset 4 +; NOSSE-NEXT: .cfi_offset %ebp, -8 +; NOSSE-NEXT: movl %esp, %ebp +; NOSSE-NEXT: .cfi_def_cfa_register %ebp +; NOSSE-NEXT: andl $-8, %esp +; NOSSE-NEXT: subl $8, %esp +; NOSSE-NEXT: movl 8(%ebp), %eax +; NOSSE-NEXT: movl 12(%ebp), %ecx +; NOSSE-NEXT: movl 16(%ebp), %edx +; NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl %ecx, (%esp) +; NOSSE-NEXT: fildll (%esp) +; NOSSE-NEXT: fistpll (%eax) +; NOSSE-NEXT: lock orl $0, (%esp) +; NOSSE-NEXT: movl %ebp, %esp +; NOSSE-NEXT: popl %ebp +; NOSSE-NEXT: .cfi_def_cfa %esp, 4 ; NOSSE-NEXT: retl store atomic i64 %val1, i64* %ptr seq_cst, align 8 ret void diff --git a/llvm/test/CodeGen/X86/atomic-mi.ll b/llvm/test/CodeGen/X86/atomic-mi.ll index f660d3311fdbc..1c135c0377c66 100644 --- a/llvm/test/CodeGen/X86/atomic-mi.ll +++ b/llvm/test/CodeGen/X86/atomic-mi.ll @@ -84,27 +84,21 @@ define void @store_atomic_imm_64(i64* %p) { ; ; X32-LABEL: store_atomic_imm_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: movl $42, %ebx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB3_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB3_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: popl %esi +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $42, (%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; These are implemented with a CAS loop on 32 bit architectures, and thus ; cannot be optimized in the same way as the others. @@ -123,27 +117,21 @@ define void @store_atomic_imm_64_big(i64* %p) { ; ; X32-LABEL: store_atomic_imm_64_big: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: movl $23, %ecx -; X32-NEXT: movl $1215752192, %ebx # imm = 0x4876E800 -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB4_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB4_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: popl %esi +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl $23, {{[0-9]+}}(%esp) +; X32-NEXT: movl $1215752192, (%esp) # imm = 0x4876E800 +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl store atomic i64 100000000000, i64* %p monotonic, align 8 ret void @@ -336,30 +324,20 @@ define void @add_64i(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl $2, %ebx -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB14_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB14_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl $2, %ecx +; X32-NEXT: adcl $0, %edx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -383,30 +361,20 @@ define void @add_64r(i64* %p, i64 %v) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl 12(%ebp), %ebx -; X32-NEXT: adcl 16(%ebp), %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB15_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB15_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl 12(%ebp), %ecx +; X32-NEXT: adcl 16(%ebp), %edx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -576,30 +544,20 @@ define void @sub_64r(i64* %p, i64 %v) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: subl 12(%ebp), %ebx -; X32-NEXT: sbbl 16(%ebp), %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB23_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB23_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: subl 12(%ebp), %ecx +; X32-NEXT: sbbl 16(%ebp), %edx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -751,29 +709,18 @@ define void @and_64i(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx -; X32-NEXT: andl $2, %ebx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB31_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB31_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl $2, %ecx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -797,30 +744,20 @@ define void @and_64r(i64* %p, i64 %v) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl 16(%ebp), %ecx -; X32-NEXT: andl 12(%ebp), %ebx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB32_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB32_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: andl 16(%ebp), %edx +; X32-NEXT: andl 12(%ebp), %ecx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -993,29 +930,19 @@ define void @or_64i(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: orl $2, %ebx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB41_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB41_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: orl $2, %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -1039,30 +966,20 @@ define void @or_64r(i64* %p, i64 %v) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: orl 16(%ebp), %ecx -; X32-NEXT: orl 12(%ebp), %ebx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB42_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB42_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: orl 16(%ebp), %edx +; X32-NEXT: orl 12(%ebp), %ecx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -1235,29 +1152,19 @@ define void @xor_64i(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl $2, %ebx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB51_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB51_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl $2, %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -1281,30 +1188,20 @@ define void @xor_64r(i64* %p, i64 %v) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl 16(%ebp), %ecx -; X32-NEXT: xorl 12(%ebp), %ebx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB52_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB52_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl 16(%ebp), %edx +; X32-NEXT: xorl 12(%ebp), %ecx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -1438,30 +1335,20 @@ define void @inc_64(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl $1, %ebx -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB58_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB58_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl $1, %ecx +; X32-NEXT: adcl $0, %edx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -1586,30 +1473,20 @@ define void @dec_64(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl $-1, %ebx -; X32-NEXT: adcl $-1, %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB63_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB63_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl $-1, %ecx +; X32-NEXT: adcl $-1, %edx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -1719,30 +1596,20 @@ define void @not_64(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) -; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: notl %edx ; X32-NEXT: notl %ecx -; X32-NEXT: notl %ebx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB68_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB68_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl %ecx, (%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl @@ -1844,30 +1711,20 @@ define void @neg_64(i64* %p) { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: fildll (%esi) -; X32-NEXT: fistpll (%esp) +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: fildll (%eax) +; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: subl (%esp), %ebx +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: subl {{[0-9]+}}(%esp), %edx ; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%esi), %eax -; X32-NEXT: movl 4(%esi), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB73_1: # %atomicrmw.start -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB73_1 -; X32-NEXT: # %bb.2: # %atomicrmw.end -; X32-NEXT: leal -8(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: movl %edx, (%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: fildll (%esp) +; X32-NEXT: fistpll (%eax) +; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll index e635a59cfdae1..8b2ed638af2a1 100644 --- a/llvm/test/CodeGen/X86/atomic-non-integer.ll +++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll @@ -77,26 +77,16 @@ define void @store_double(double* %fptr, double %v) { ; ; X86-NOSSE-LABEL: store_double: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 -; X86-NOSSE-NEXT: .cfi_offset %esi, -12 -; X86-NOSSE-NEXT: .cfi_offset %ebx, -8 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl (%esi), %eax -; X86-NOSSE-NEXT: movl 4(%esi), %edx -; X86-NOSSE-NEXT: .p2align 4, 0x90 -; X86-NOSSE-NEXT: .LBB2_1: # %atomicrmw.start -; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) -; X86-NOSSE-NEXT: jne .LBB2_1 -; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: addl $12, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; @@ -576,26 +566,17 @@ define void @store_double_seq_cst(double* %fptr, double %v) { ; ; X86-NOSSE-LABEL: store_double_seq_cst: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 -; X86-NOSSE-NEXT: .cfi_offset %esi, -12 -; X86-NOSSE-NEXT: .cfi_offset %ebx, -8 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl (%esi), %eax -; X86-NOSSE-NEXT: movl 4(%esi), %edx -; X86-NOSSE-NEXT: .p2align 4, 0x90 -; X86-NOSSE-NEXT: .LBB9_1: # %atomicrmw.start -; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) -; X86-NOSSE-NEXT: jne .LBB9_1 -; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll (%eax) +; X86-NOSSE-NEXT: lock orl $0, (%esp) +; X86-NOSSE-NEXT: addl $12, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ;