diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c32b1a66356ea..f72cb664277d5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19919,7 +19919,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, // TODO: Allow FP_TO_UINT. SDValue CastToInt = CastToFP.getOperand(0); MVT VT = CastToFP.getSimpleValueType(); - if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector()) + if ((CastToInt.getOpcode() != ISD::FP_TO_SINT && + CastToInt.getOpcode() != ISD::FP_TO_UINT) || + VT.isVector()) return SDValue(); MVT IntVT = CastToInt.getSimpleValueType(); @@ -19931,22 +19933,57 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, // See if we have 128-bit vector cast instructions for this type of cast. // We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd. if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || - IntVT != MVT::i32) + !(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI()))) return SDValue(); unsigned SrcSize = SrcVT.getSizeInBits(); unsigned IntSize = IntVT.getSizeInBits(); unsigned VTSize = VT.getSizeInBits(); - MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize); - MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize); - MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize); - - // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64. - unsigned ToIntOpcode = - SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; - unsigned ToFPOpcode = - IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + unsigned ToIntOpcode, ToFPOpcode; + unsigned Width = 128; + bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT; + + if (Subtarget.hasVLX() && IntVT == MVT::i64) { + // AVX512DQ+VLX + if (IsUnsigned) { + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; + } else { + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + } + } else if (IntVT == MVT::i64) { + // Need to extend width for AVX512DQ without AVX512VL + Width = 512; + ToIntOpcode = CastToInt.getOpcode(); + ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP; + } else { + // SSE2 + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + } + + MVT VecSrcVT, VecIntVT, VecVT; + unsigned NumElts = Width / IntSize; + VecIntVT = MVT::getVectorVT(IntVT, NumElts); + unsigned SrcElts, VTElts; + // vcvttps2qq cannot convert v16f32 <-> v8i64 + if (IntVT == MVT::i64 && Width == 512) { + SrcElts = NumElts; + VTElts = NumElts; + } else { + SrcElts = Width / SrcSize; + VTElts = Width / VTSize; + } + VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts); + VecVT = MVT::getVectorVT(VT, VTElts); // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0 // // We are not defining the high elements (for example, zero them) because @@ -20617,6 +20654,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, else if (isLegalConversion(SrcVT, DstVT, false, Subtarget)) return Op; + if (SDValue V = lowerFPToIntToFP(Op, dl, DAG, Subtarget)) + return V; + if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, dl, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll index b6c17cecffbd6..724a259c33a89 100644 --- a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll +++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll @@ -34,11 +34,26 @@ define double @scvtf64_i64(double %a0) { ; SSE-NEXT: cvtsi2sd %rax, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: scvtf64_i64: -; AVX: # %bb.0: -; AVX-NEXT: vcvttsd2si %xmm0, %rax -; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: scvtf64_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttsd2si %xmm0, %rax +; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-VL-LABEL: scvtf64_i64: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vcvttpd2qq %xmm0, %xmm0 +; AVX512-VL-NEXT: vcvtqq2pd %xmm0, %xmm0 +; AVX512-VL-NEXT: retq +; +; AVX512-NOVL-LABEL: scvtf64_i64: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NOVL-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512-NOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NOVL-NEXT: vzeroupper +; AVX512-NOVL-NEXT: retq %ii = fptosi double %a0 to i64 %ff = sitofp i64 %ii to double ret double %ff @@ -69,11 +84,26 @@ define float @scvtf32_i64(float %a0) { ; SSE-NEXT: cvtsi2ss %rax, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: scvtf32_i64: -; AVX: # %bb.0: -; AVX-NEXT: vcvttss2si %xmm0, %rax -; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: scvtf32_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttss2si %xmm0, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-VL-LABEL: scvtf32_i64: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512-VL-NEXT: vcvtqq2ps %xmm0, %xmm0 +; AVX512-VL-NEXT: retq +; +; AVX512-NOVL-LABEL: scvtf32_i64: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NOVL-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512-NOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NOVL-NEXT: vzeroupper +; AVX512-NOVL-NEXT: retq %ii = fptosi float %a0 to i64 %ff = sitofp i64 %ii to float ret float %ff @@ -86,24 +116,15 @@ define float @scvtf32_i64(float %a0) { define double @ucvtf64_i32(double %a0) { ; SSE-LABEL: ucvtf64_i32: ; SSE: # %bb.0: -; SSE-NEXT: cvttsd2si %xmm0, %rax -; SSE-NEXT: movl %eax, %eax -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: cvtsi2sd %rax, %xmm0 +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: ucvtf64_i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvttsd2si %xmm0, %rax -; AVX2-NEXT: movl %eax, %eax -; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: ucvtf64_i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: ucvtf64_i32: +; AVX: # %bb.0: +; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: retq %ii = fptoui double %a0 to i32 %ff = uitofp i32 %ii to double ret double %ff @@ -143,11 +164,20 @@ define double @ucvtf64_i64(double %a0) { ; AVX2-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ucvtf64_i64: -; AVX512: # %bb.0: -; AVX512-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 -; AVX512-NEXT: retq +; AVX512-VL-LABEL: ucvtf64_i64: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vcvttpd2uqq %xmm0, %xmm0 +; AVX512-VL-NEXT: vcvtuqq2pd %xmm0, %xmm0 +; AVX512-VL-NEXT: retq +; +; AVX512-NOVL-LABEL: ucvtf64_i64: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NOVL-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512-NOVL-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NOVL-NEXT: vzeroupper +; AVX512-NOVL-NEXT: retq %ii = fptoui double %a0 to i64 %ff = uitofp i64 %ii to double ret double %ff @@ -156,24 +186,15 @@ define double @ucvtf64_i64(double %a0) { define float @ucvtf32_i32(float %a0) { ; SSE-LABEL: ucvtf32_i32: ; SSE: # %bb.0: -; SSE-NEXT: cvttss2si %xmm0, %rax -; SSE-NEXT: movl %eax, %eax -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: cvtsi2ss %rax, %xmm0 +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: ucvtf32_i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvttss2si %xmm0, %rax -; AVX2-NEXT: movl %eax, %eax -; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: ucvtf32_i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vcvttss2usi %xmm0, %eax -; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: ucvtf32_i32: +; AVX: # %bb.0: +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq %ii = fptoui float %a0 to i32 %ff = uitofp i32 %ii to float ret float %ff @@ -226,15 +247,23 @@ define float @ucvtf32_i64(float %a0) { ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ucvtf32_i64: -; AVX512: # %bb.0: -; AVX512-NEXT: vcvttss2usi %xmm0, %rax -; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 -; AVX512-NEXT: retq +; AVX512-VL-LABEL: ucvtf32_i64: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vcvttps2uqq %xmm0, %xmm0 +; AVX512-VL-NEXT: vcvtuqq2ps %xmm0, %xmm0 +; AVX512-VL-NEXT: retq +; +; AVX512-NOVL-LABEL: ucvtf32_i64: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NOVL-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512-NOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NOVL-NEXT: vzeroupper +; AVX512-NOVL-NEXT: retq %ii = fptoui float %a0 to i64 %ff = uitofp i64 %ii to float ret float %ff } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX512-NOVL: {{.*}} -; AVX512-VL: {{.*}} +; AVX512: {{.*}} diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll index 9095fb1550e70..c608c70f813c2 100644 --- a/llvm/test/CodeGen/X86/ftrunc.ll +++ b/llvm/test/CodeGen/X86/ftrunc.ll @@ -10,10 +10,8 @@ declare i64 @llvm.fptosi.sat.i64.f64(double) define float @trunc_unsigned_f32(float %x) #0 { ; SSE2-LABEL: trunc_unsigned_f32: ; SSE2: # %bb.0: -; SSE2-NEXT: cvttss2si %xmm0, %rax -; SSE2-NEXT: movl %eax, %eax -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_unsigned_f32: diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll index 8c11fe147f0d8..30262f93501e6 100644 --- a/llvm/test/CodeGen/X86/isint.ll +++ b/llvm/test/CodeGen/X86/isint.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X64 %s -; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s - +; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s +; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=SSE2 %s +; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck -check-prefix=AVX512VL %s ; PR19059 define i32 @isint_return(double %d) nounwind { @@ -17,13 +17,31 @@ define i32 @isint_return(double %d) nounwind { ; ; X86-LABEL: isint_return: ; X86: # %bb.0: -; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: cvttpd2dq %xmm0, %xmm1 ; X86-NEXT: cvtdq2pd %xmm1, %xmm1 ; X86-NEXT: cmpeqsd %xmm0, %xmm1 -; X86-NEXT: movd %xmm1, %eax +; X86-NEXT: movq %xmm1, %rax ; X86-NEXT: andl $1, %eax -; X86-NEXT: retl +; X86-NEXT: # kill: def $eax killed $eax killed $rax +; X86-NEXT: retq +; +; SSE2-LABEL: isint_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE2-NEXT: cmpeqsd %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isint_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq %i = fptosi double %d to i32 %e = sitofp i32 %i to double %c = fcmp oeq double %d, %e @@ -43,13 +61,219 @@ define i32 @isint_float_return(float %f) nounwind { ; ; X86-LABEL: isint_float_return: ; X86: # %bb.0: -; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: cvttps2dq %xmm0, %xmm1 ; X86-NEXT: cvtdq2ps %xmm1, %xmm1 ; X86-NEXT: cmpeqss %xmm0, %xmm1 ; X86-NEXT: movd %xmm1, %eax ; X86-NEXT: andl $1, %eax -; X86-NEXT: retl +; X86-NEXT: retq +; +; SSE2-LABEL: isint_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: cvttps2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE2-NEXT: cmpeqss %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isint_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptosi float %f to i32 + %g = sitofp i32 %i to float + %c = fcmp oeq float %f, %g + %z = zext i1 %c to i32 + ret i32 %z +} + +define i64 @isint64_float_return(float %f) nounwind { +; X86-LABEL: isint64_float_return: +; X86: # %bb.0: +; X86-NEXT: cvttss2si %xmm0, %rax +; X86-NEXT: cvtsi2ss %rax, %xmm1 +; X86-NEXT: cmpeqss %xmm0, %xmm1 +; X86-NEXT: movd %xmm1, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq +; +; SSE2-LABEL: isint64_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $32, %esp +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; SSE2-NEXT: flds {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE2-NEXT: movw %ax, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; SSE2-NEXT: fildll {{[0-9]+}}(%esp) +; SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; SSE2-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isint64_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2qq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtqq2ps %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptosi float %f to i64 + %g = sitofp i64 %i to float + %c = fcmp oeq float %f, %g + %z = zext i1 %c to i64 + ret i64 %z +} + +define i64 @isint64_return(double %d) nounwind { +; X86-LABEL: isint64_return: +; X86: # %bb.0: +; X86-NEXT: cvttsd2si %xmm0, %rax +; X86-NEXT: cvtsi2sd %rax, %xmm1 +; X86-NEXT: cmpeqsd %xmm0, %xmm1 +; X86-NEXT: movq %xmm1, %rax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq +; +; SSE2-LABEL: isint64_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $32, %esp +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE2-NEXT: movw %ax, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; SSE2-NEXT: fildll {{[0-9]+}}(%esp) +; SSE2-NEXT: fstpl {{[0-9]+}}(%esp) +; SSE2-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isint64_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2qq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtqq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptosi double %d to i64 + %g = sitofp i64 %i to double + %c = fcmp oeq double %d, %g + %z = zext i1 %c to i64 + ret i64 %z +} + +define i32 @isuint_return(double %d) nounwind { +; AVX512-NODQ-LABEL: isuint_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %eax +; AVX512-NODQ-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; X86-LABEL: isuint_return: +; X86: # %bb.0: +; X86-NEXT: cvttpd2dq %xmm0, %xmm1 +; X86-NEXT: cvtdq2pd %xmm1, %xmm1 +; X86-NEXT: cmpeqsd %xmm0, %xmm1 +; X86-NEXT: movq %xmm1, %rax +; X86-NEXT: andl $1, %eax +; X86-NEXT: # kill: def $eax killed $eax killed $rax +; X86-NEXT: retq +; +; SSE2-LABEL: isuint_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE2-NEXT: cmpeqsd %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isuint_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptoui double %d to i32 + %e = uitofp i32 %i to double + %c = fcmp oeq double %d, %e + %z = zext i1 %c to i32 + ret i32 %z +} + +define i32 @isuint_float_return(float %f) nounwind { +; AVX512-NODQ-LABEL: isuint_float_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1 +; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; X86-LABEL: isuint_float_return: +; X86: # %bb.0: +; X86-NEXT: cvttps2dq %xmm0, %xmm1 +; X86-NEXT: cvtdq2ps %xmm1, %xmm1 +; X86-NEXT: cmpeqss %xmm0, %xmm1 +; X86-NEXT: movd %xmm1, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq +; +; SSE2-LABEL: isuint_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: cvttps2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE2-NEXT: cmpeqss %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isuint_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq %i = fptosi float %f to i32 %g = sitofp i32 %i to float %c = fcmp oeq float %f, %g @@ -57,35 +281,225 @@ define i32 @isint_float_return(float %f) nounwind { ret i32 %z } +define i64 @isuint64_return(double %d) nounwind { +; AVX512-NODQ-LABEL: isuint64_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512-NODQ-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; X86-LABEL: isuint64_return: +; X86: # %bb.0: +; X86-NEXT: cvttsd2si %xmm0, %rax +; X86-NEXT: movq %rax, %rcx +; X86-NEXT: sarq $63, %rcx +; X86-NEXT: movapd %xmm0, %xmm1 +; X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X86-NEXT: cvttsd2si %xmm1, %rdx +; X86-NEXT: andq %rcx, %rdx +; X86-NEXT: orq %rax, %rdx +; X86-NEXT: movq %rdx, %xmm1 +; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; X86-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X86-NEXT: movapd %xmm1, %xmm2 +; X86-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; X86-NEXT: addsd %xmm1, %xmm2 +; X86-NEXT: cmpeqsd %xmm0, %xmm2 +; X86-NEXT: movq %xmm2, %rax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq +; +; SSE2-LABEL: isuint64_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $16, %esp +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] +; SSE2-NEXT: ucomisd %xmm0, %xmm1 +; SSE2-NEXT: jbe .LBB6_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: .LBB6_2: +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: subsd %xmm1, %xmm2 +; SSE2-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) +; SSE2-NEXT: setbe %al +; SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SSE2-NEXT: orl $3072, %ecx # imm = 0xC00 +; SSE2-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzbl %al, %eax +; SSE2-NEXT: shll $31, %eax +; SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] +; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE2-NEXT: addsd %xmm2, %xmm1 +; SSE2-NEXT: cmpeqsd %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isuint64_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2uqq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtuqq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptoui double %d to i64 + %e = uitofp i64 %i to double + %c = fcmp oeq double %d, %e + %z = zext i1 %c to i64 + ret i64 %z +} + +define i64 @isuint64_float_return(float %f) nounwind { +; X86-LABEL: isuint64_float_return: +; X86: # %bb.0: +; X86-NEXT: cvttss2si %xmm0, %rcx +; X86-NEXT: movq %rcx, %rdx +; X86-NEXT: sarq $63, %rdx +; X86-NEXT: movaps %xmm0, %xmm1 +; X86-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X86-NEXT: cvttss2si %xmm1, %rax +; X86-NEXT: andq %rdx, %rax +; X86-NEXT: orq %rcx, %rax +; X86-NEXT: js .LBB7_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: xorps %xmm1, %xmm1 +; X86-NEXT: cvtsi2ss %rax, %xmm1 +; X86-NEXT: jmp .LBB7_3 +; X86-NEXT: .LBB7_1: +; X86-NEXT: movq %rax, %rcx +; X86-NEXT: shrq %rcx +; X86-NEXT: andl $1, %eax +; X86-NEXT: orq %rcx, %rax +; X86-NEXT: xorps %xmm1, %xmm1 +; X86-NEXT: cvtsi2ss %rax, %xmm1 +; X86-NEXT: addss %xmm1, %xmm1 +; X86-NEXT: .LBB7_3: +; X86-NEXT: cmpeqss %xmm1, %xmm0 +; X86-NEXT: movd %xmm0, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq +; +; SSE2-LABEL: isuint64_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $32, %esp +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] +; SSE2-NEXT: ucomiss %xmm0, %xmm1 +; SSE2-NEXT: jbe .LBB7_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: .LBB7_2: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: subss %xmm1, %xmm2 +; SSE2-NEXT: movss %xmm2, {{[0-9]+}}(%esp) +; SSE2-NEXT: setbe %al +; SSE2-NEXT: flds {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SSE2-NEXT: orl $3072, %ecx # imm = 0xC00 +; SSE2-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzbl %al, %eax +; SSE2-NEXT: shll $31, %eax +; SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: movq %xmm2, {{[0-9]+}}(%esp) +; SSE2-NEXT: shrl $31, %eax +; SSE2-NEXT: fildll {{[0-9]+}}(%esp) +; SSE2-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; SSE2-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isuint64_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2uqq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtuqq2ps %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptoui float %f to i64 + %g = uitofp i64 %i to float + %c = fcmp oeq float %f, %g + %z = zext i1 %c to i64 + ret i64 %z +} + declare void @foo() define void @isint_branch(double %d) nounwind { -; X64-LABEL: isint_branch: -; X64: # %bb.0: -; X64-NEXT: cvttpd2dq %xmm0, %xmm1 -; X64-NEXT: cvtdq2pd %xmm1, %xmm1 -; X64-NEXT: ucomisd %xmm1, %xmm0 -; X64-NEXT: jne .LBB2_2 -; X64-NEXT: jp .LBB2_2 -; X64-NEXT: # %bb.1: # %true -; X64-NEXT: pushq %rax -; X64-NEXT: callq foo@PLT -; X64-NEXT: popq %rax -; X64-NEXT: .LBB2_2: # %false -; X64-NEXT: retq -; ; X86-LABEL: isint_branch: ; X86: # %bb.0: -; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: cvttpd2dq %xmm0, %xmm1 ; X86-NEXT: cvtdq2pd %xmm1, %xmm1 ; X86-NEXT: ucomisd %xmm1, %xmm0 -; X86-NEXT: jne .LBB2_2 -; X86-NEXT: jp .LBB2_2 +; X86-NEXT: jne .LBB8_2 +; X86-NEXT: jp .LBB8_2 ; X86-NEXT: # %bb.1: # %true -; X86-NEXT: calll foo@PLT -; X86-NEXT: .LBB2_2: # %false -; X86-NEXT: retl +; X86-NEXT: pushq %rax +; X86-NEXT: callq foo@PLT +; X86-NEXT: popq %rax +; X86-NEXT: .LBB8_2: # %false +; X86-NEXT: retq +; +; SSE2-LABEL: isint_branch: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE2-NEXT: ucomisd %xmm1, %xmm0 +; SSE2-NEXT: jne .LBB8_2 +; SSE2-NEXT: jp .LBB8_2 +; SSE2-NEXT: # %bb.1: # %true +; SSE2-NEXT: calll foo@PLT +; SSE2-NEXT: .LBB8_2: # %false +; SSE2-NEXT: retl +; +; AVX512VL-LABEL: isint_branch: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 +; AVX512VL-NEXT: jne .LBB8_2 +; AVX512VL-NEXT: jp .LBB8_2 +; AVX512VL-NEXT: # %bb.1: # %true +; AVX512VL-NEXT: pushq %rax +; AVX512VL-NEXT: callq foo@PLT +; AVX512VL-NEXT: popq %rax +; AVX512VL-NEXT: .LBB8_2: # %false +; AVX512VL-NEXT: retq %i = fptosi double %d to i32 %e = sitofp i32 %i to double %c = fcmp oeq double %d, %e diff --git a/llvm/test/CodeGen/X86/setoeq.ll b/llvm/test/CodeGen/X86/setoeq.ll index 131e279aa645c..10e2ace08a86a 100644 --- a/llvm/test/CodeGen/X86/setoeq.ll +++ b/llvm/test/CodeGen/X86/setoeq.ll @@ -47,17 +47,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone { ; SSE-LABEL: oeq_f64_u32: ; SSE: # %bb.0: # %entry ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: cvttsd2si %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: sarl $31, %ecx -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; SSE-NEXT: cvttsd2si %xmm1, %edx -; SSE-NEXT: andl %ecx, %edx -; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; SSE-NEXT: cvttpd2dq %xmm0, %xmm1 +; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 ; SSE-NEXT: cmpeqsd %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax ; SSE-NEXT: andl $1, %eax @@ -67,16 +58,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone { ; AVX-LABEL: oeq_f64_u32: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vcvttsd2si %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: sarl $31, %ecx -; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 -; AVX-NEXT: vcvttsd2si %xmm1, %edx -; AVX-NEXT: andl %ecx, %edx -; AVX-NEXT: orl %eax, %edx -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 +; AVX-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1 ; AVX-NEXT: vcmpeqsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: andl $1, %eax @@ -86,8 +69,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone { ; AVX512-LABEL: oeq_f64_u32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1 +; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1 ; AVX512-NEXT: vcmpeqsd %xmm0, %xmm1, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax @@ -311,17 +294,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone { ; SSE-LABEL: une_f64_u32: ; SSE: # %bb.0: # %entry ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: cvttsd2si %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: sarl $31, %ecx -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; SSE-NEXT: cvttsd2si %xmm1, %edx -; SSE-NEXT: andl %ecx, %edx -; SSE-NEXT: orl %eax, %edx -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; SSE-NEXT: cvttpd2dq %xmm0, %xmm1 +; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 ; SSE-NEXT: cmpneqsd %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax ; SSE-NEXT: andl $1, %eax @@ -331,16 +305,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone { ; AVX-LABEL: une_f64_u32: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vcvttsd2si %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: sarl $31, %ecx -; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 -; AVX-NEXT: vcvttsd2si %xmm1, %edx -; AVX-NEXT: andl %ecx, %edx -; AVX-NEXT: orl %eax, %edx -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; AVX-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 +; AVX-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1 ; AVX-NEXT: vcmpneqsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: andl $1, %eax @@ -350,8 +316,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone { ; AVX512-LABEL: une_f64_u32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512-NEXT: vcvttsd2usi %xmm0, %eax -; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1 +; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1 ; AVX512-NEXT: vcmpneqsd %xmm0, %xmm1, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax