From a45fcc9195e6a52d430420f4207a3d9d15ed0f18 Mon Sep 17 00:00:00 2001 From: Kavin Gnanapandithan Date: Thu, 9 Oct 2025 08:41:07 -0400 Subject: [PATCH 1/6] Added AVX512 handling for UI2P in lowerFPToIntToFP --- llvm/lib/Target/X86/X86ISelLowering.cpp | 66 +++- llvm/test/CodeGen/X86/isint.ll | 459 +++++++++++++++++++++++- 2 files changed, 508 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2feb76e0eb7b4..9e209405bf99e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19885,7 +19885,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, // TODO: Allow FP_TO_UINT. SDValue CastToInt = CastToFP.getOperand(0); MVT VT = CastToFP.getSimpleValueType(); - if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector()) + if ((CastToInt.getOpcode() != ISD::FP_TO_SINT && CastToInt.getOpcode() != ISD::FP_TO_UINT) || VT.isVector()) return SDValue(); MVT IntVT = CastToInt.getSimpleValueType(); @@ -19897,22 +19897,68 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, // See if we have 128-bit vector cast instructions for this type of cast. // We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd. if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || - IntVT != MVT::i32) + !(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI()))) return SDValue(); unsigned SrcSize = SrcVT.getSizeInBits(); unsigned IntSize = IntVT.getSizeInBits(); unsigned VTSize = VT.getSizeInBits(); - MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize); - MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize); - MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize); + unsigned ToIntOpcode, ToFPOpcode; + unsigned Width = 128; + bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT; - // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64. - unsigned ToIntOpcode = - SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; - unsigned ToFPOpcode = - IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + if (IntVT == MVT::i32) { + if (IsUnsigned && !Subtarget.hasVLX()) + return SDValue(); // Need AVX512VL for unsigned i32 + if (Subtarget.hasVLX()) { + if (IsUnsigned) { + ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; + ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; + } else { + ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + } + } else { + // SSE2 + ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + } + } else { + if (Subtarget.hasVLX()) { + if (IsUnsigned) { + ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; + ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; + } else { + ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + } + } else { + // Need to extend width for AVX512DQ + Width = 512; + ToIntOpcode = CastToInt.getOpcode(); + ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP; + } + } + + MVT VecSrcVT; + MVT VecIntVT; + MVT VecVT; + if (IntVT == MVT::i64) { + unsigned NumElts = Width / IntSize; + VecIntVT = MVT::getVectorVT(IntVT, NumElts); + + // minimum legal size is v4f32 + unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts; + unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts; + + VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts); + VecVT = MVT::getVectorVT(VT, VTElts); + } else { + VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize); + VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize); + VecVT = MVT::getVectorVT(VT, Width / VTSize); + } // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0 // // We are not defining the high elements (for example, zero them) because diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll index 8a56f49a6c755..d0b340ce37875 100644 --- a/llvm/test/CodeGen/X86/isint.ll +++ b/llvm/test/CodeGen/X86/isint.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s ; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s - +; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f | FileCheck -check-prefix=AVX512-NODQ %s +; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq | FileCheck -check-prefix=AVX512-NODQ %s +; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck -check-prefix=AVX512VL %s ; PR19059 define i32 @isint_return(double %d) nounwind { @@ -24,6 +26,22 @@ define i32 @isint_return(double %d) nounwind { ; CHECK32-NEXT: movd %xmm1, %eax ; CHECK32-NEXT: andl $1, %eax ; CHECK32-NEXT: retl +; +; AVX512-NODQ-LABEL: isint_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; AVX512VL-LABEL: isint_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq %i = fptosi double %d to i32 %e = sitofp i32 %i to double %c = fcmp oeq double %d, %e @@ -50,6 +68,221 @@ define i32 @isint_float_return(float %f) nounwind { ; CHECK32-NEXT: movd %xmm1, %eax ; CHECK32-NEXT: andl $1, %eax ; CHECK32-NEXT: retl +; +; AVX512-NODQ-LABEL: isint_float_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1 +; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; AVX512VL-LABEL: isint_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptosi float %f to i32 + %g = sitofp i32 %i to float + %c = fcmp oeq float %f, %g + %z = zext i1 %c to i32 + ret i32 %z +} + +define i64 @isint64_float_return(float %f) nounwind { +; CHECK64-LABEL: isint64_float_return: +; CHECK64: # %bb.0: +; CHECK64-NEXT: cvttss2si %xmm0, %rax +; CHECK64-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK64-NEXT: cmpeqss %xmm0, %xmm1 +; CHECK64-NEXT: movd %xmm1, %eax +; CHECK64-NEXT: andl $1, %eax +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: isint64_float_return: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %ebp +; CHECK32-NEXT: movl %esp, %ebp +; CHECK32-NEXT: andl $-8, %esp +; CHECK32-NEXT: subl $32, %esp +; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; CHECK32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00 +; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fildll {{[0-9]+}}(%esp) +; CHECK32-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 +; CHECK32-NEXT: movd %xmm0, %eax +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: xorl %edx, %edx +; CHECK32-NEXT: movl %ebp, %esp +; CHECK32-NEXT: popl %ebp +; CHECK32-NEXT: retl +; +; AVX512VL-LABEL: isint64_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2qq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtqq2ps %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptosi float %f to i64 + %g = sitofp i64 %i to float + %c = fcmp oeq float %f, %g + %z = zext i1 %c to i64 + ret i64 %z +} + +define i64 @isint64_return(double %d) nounwind { +; CHECK64-LABEL: isint64_return: +; CHECK64: # %bb.0: +; CHECK64-NEXT: cvttsd2si %xmm0, %rax +; CHECK64-NEXT: cvtsi2sd %rax, %xmm1 +; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1 +; CHECK64-NEXT: movq %xmm1, %rax +; CHECK64-NEXT: andl $1, %eax +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: isint64_return: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %ebp +; CHECK32-NEXT: movl %esp, %ebp +; CHECK32-NEXT: andl $-8, %esp +; CHECK32-NEXT: subl $32, %esp +; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00 +; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fildll {{[0-9]+}}(%esp) +; CHECK32-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK32-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0 +; CHECK32-NEXT: movd %xmm0, %eax +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: xorl %edx, %edx +; CHECK32-NEXT: movl %ebp, %esp +; CHECK32-NEXT: popl %ebp +; CHECK32-NEXT: retl +; +; AVX512VL-LABEL: isint64_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2qq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtqq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptosi double %d to i64 + %g = sitofp i64 %i to double + %c = fcmp oeq double %d, %g + %z = zext i1 %c to i64 + ret i64 %z +} + +define i32 @isuint_return(double %d) nounwind { +; CHECK64-LABEL: isuint_return: +; CHECK64: # %bb.0: +; CHECK64-NEXT: cvttsd2si %xmm0, %rax +; CHECK64-NEXT: movl %eax, %eax +; CHECK64-NEXT: cvtsi2sd %rax, %xmm1 +; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1 +; CHECK64-NEXT: movq %xmm1, %rax +; CHECK64-NEXT: andl $1, %eax +; CHECK64-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: isuint_return: +; CHECK32: # %bb.0: +; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK32-NEXT: cvttsd2si %xmm0, %eax +; CHECK32-NEXT: movl %eax, %ecx +; CHECK32-NEXT: sarl $31, %ecx +; CHECK32-NEXT: movapd %xmm0, %xmm1 +; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; CHECK32-NEXT: cvttsd2si %xmm1, %edx +; CHECK32-NEXT: andl %ecx, %edx +; CHECK32-NEXT: orl %eax, %edx +; CHECK32-NEXT: movd %edx, %xmm1 +; CHECK32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1 +; CHECK32-NEXT: movd %xmm1, %eax +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: retl +; +; AVX512-NODQ-LABEL: isuint_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %eax +; AVX512-NODQ-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; AVX512VL-LABEL: isuint_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax +; AVX512VL-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptoui double %d to i32 + %e = uitofp i32 %i to double + %c = fcmp oeq double %d, %e + %z = zext i1 %c to i32 + ret i32 %z +} + +define i32 @isuint_float_return(float %f) nounwind { +; CHECK64-LABEL: isuint_float_return: +; CHECK64: # %bb.0: +; CHECK64-NEXT: cvttps2dq %xmm0, %xmm1 +; CHECK64-NEXT: cvtdq2ps %xmm1, %xmm1 +; CHECK64-NEXT: cmpeqss %xmm0, %xmm1 +; CHECK64-NEXT: movd %xmm1, %eax +; CHECK64-NEXT: andl $1, %eax +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: isuint_float_return: +; CHECK32: # %bb.0: +; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK32-NEXT: cvttps2dq %xmm0, %xmm1 +; CHECK32-NEXT: cvtdq2ps %xmm1, %xmm1 +; CHECK32-NEXT: cmpeqss %xmm0, %xmm1 +; CHECK32-NEXT: movd %xmm1, %eax +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: retl +; +; AVX512-NODQ-LABEL: isuint_float_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1 +; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; AVX512VL-LABEL: isuint_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq %i = fptosi float %f to i32 %g = sitofp i32 %i to float %c = fcmp oeq float %f, %g @@ -57,6 +290,190 @@ define i32 @isint_float_return(float %f) nounwind { ret i32 %z } +define i64 @isuint64_return(double %d) nounwind { +; CHECK64-LABEL: isuint64_return: +; CHECK64: # %bb.0: +; CHECK64-NEXT: cvttsd2si %xmm0, %rax +; CHECK64-NEXT: movq %rax, %rcx +; CHECK64-NEXT: sarq $63, %rcx +; CHECK64-NEXT: movapd %xmm0, %xmm1 +; CHECK64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK64-NEXT: cvttsd2si %xmm1, %rdx +; CHECK64-NEXT: andq %rcx, %rdx +; CHECK64-NEXT: orq %rax, %rdx +; CHECK64-NEXT: movq %rdx, %xmm1 +; CHECK64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; CHECK64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK64-NEXT: movapd %xmm1, %xmm2 +; CHECK64-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; CHECK64-NEXT: addsd %xmm1, %xmm2 +; CHECK64-NEXT: cmpeqsd %xmm0, %xmm2 +; CHECK64-NEXT: movq %xmm2, %rax +; CHECK64-NEXT: andl $1, %eax +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: isuint64_return: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %ebp +; CHECK32-NEXT: movl %esp, %ebp +; CHECK32-NEXT: andl $-8, %esp +; CHECK32-NEXT: subl $16, %esp +; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK32-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] +; CHECK32-NEXT: ucomisd %xmm0, %xmm1 +; CHECK32-NEXT: jbe .LBB6_2 +; CHECK32-NEXT: # %bb.1: +; CHECK32-NEXT: xorpd %xmm1, %xmm1 +; CHECK32-NEXT: .LBB6_2: +; CHECK32-NEXT: movapd %xmm0, %xmm2 +; CHECK32-NEXT: subsd %xmm1, %xmm2 +; CHECK32-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) +; CHECK32-NEXT: setbe %al +; CHECK32-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00 +; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movzbl %al, %eax +; CHECK32-NEXT: shll $31, %eax +; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movd %eax, %xmm1 +; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] +; CHECK32-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; CHECK32-NEXT: movapd %xmm2, %xmm1 +; CHECK32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; CHECK32-NEXT: addsd %xmm2, %xmm1 +; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1 +; CHECK32-NEXT: movd %xmm1, %eax +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: xorl %edx, %edx +; CHECK32-NEXT: movl %ebp, %esp +; CHECK32-NEXT: popl %ebp +; CHECK32-NEXT: retl +; +; AVX512-NODQ-LABEL: isuint64_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512-NODQ-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; AVX512VL-LABEL: isuint64_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1 +; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptoui double %d to i64 + %e = uitofp i64 %i to double + %c = fcmp oeq double %d, %e + %z = zext i1 %c to i64 + ret i64 %z +} + +define i64 @isuint64_float_return(float %f) nounwind { +; CHECK64-LABEL: isuint64_float_return: +; CHECK64: # %bb.0: +; CHECK64-NEXT: cvttss2si %xmm0, %rcx +; CHECK64-NEXT: movq %rcx, %rdx +; CHECK64-NEXT: sarq $63, %rdx +; CHECK64-NEXT: movaps %xmm0, %xmm1 +; CHECK64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK64-NEXT: cvttss2si %xmm1, %rax +; CHECK64-NEXT: andq %rdx, %rax +; CHECK64-NEXT: orq %rcx, %rax +; CHECK64-NEXT: js .LBB7_1 +; CHECK64-NEXT: # %bb.2: +; CHECK64-NEXT: xorps %xmm1, %xmm1 +; CHECK64-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK64-NEXT: jmp .LBB7_3 +; CHECK64-NEXT: .LBB7_1: +; CHECK64-NEXT: movq %rax, %rcx +; CHECK64-NEXT: shrq %rcx +; CHECK64-NEXT: andl $1, %eax +; CHECK64-NEXT: orq %rcx, %rax +; CHECK64-NEXT: xorps %xmm1, %xmm1 +; CHECK64-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK64-NEXT: addss %xmm1, %xmm1 +; CHECK64-NEXT: .LBB7_3: +; CHECK64-NEXT: cmpeqss %xmm1, %xmm0 +; CHECK64-NEXT: movd %xmm0, %eax +; CHECK64-NEXT: andl $1, %eax +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: isuint64_float_return: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %ebp +; CHECK32-NEXT: movl %esp, %ebp +; CHECK32-NEXT: andl $-8, %esp +; CHECK32-NEXT: subl $32, %esp +; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] +; CHECK32-NEXT: ucomiss %xmm0, %xmm1 +; CHECK32-NEXT: jbe .LBB7_2 +; CHECK32-NEXT: # %bb.1: +; CHECK32-NEXT: xorps %xmm1, %xmm1 +; CHECK32-NEXT: .LBB7_2: +; CHECK32-NEXT: movaps %xmm0, %xmm2 +; CHECK32-NEXT: subss %xmm1, %xmm2 +; CHECK32-NEXT: movss %xmm2, {{[0-9]+}}(%esp) +; CHECK32-NEXT: setbe %al +; CHECK32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00 +; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) +; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) +; CHECK32-NEXT: movzbl %al, %eax +; CHECK32-NEXT: shll $31, %eax +; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movd %eax, %xmm1 +; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK32-NEXT: movq %xmm2, {{[0-9]+}}(%esp) +; CHECK32-NEXT: shrl $31, %eax +; CHECK32-NEXT: fildll {{[0-9]+}}(%esp) +; CHECK32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; CHECK32-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 +; CHECK32-NEXT: movd %xmm0, %eax +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: xorl %edx, %edx +; CHECK32-NEXT: movl %ebp, %esp +; CHECK32-NEXT: popl %ebp +; CHECK32-NEXT: retl +; +; AVX512-NODQ-LABEL: isuint64_float_return: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttss2usi %xmm0, %rax +; AVX512-NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1 +; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512-NODQ-NEXT: kmovw %k0, %eax +; AVX512-NODQ-NEXT: retq +; +; AVX512VL-LABEL: isuint64_float_return: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1 +; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: retq + %i = fptoui float %f to i64 + %g = uitofp i64 %i to float + %c = fcmp oeq float %f, %g + %z = zext i1 %c to i64 + ret i64 %z +} + declare void @foo() define void @isint_branch(double %d) nounwind { @@ -65,13 +482,13 @@ define void @isint_branch(double %d) nounwind { ; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1 ; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK64-NEXT: ucomisd %xmm1, %xmm0 -; CHECK64-NEXT: jne .LBB2_2 -; CHECK64-NEXT: jp .LBB2_2 +; CHECK64-NEXT: jne .LBB8_2 +; CHECK64-NEXT: jp .LBB8_2 ; CHECK64-NEXT: # %bb.1: # %true ; CHECK64-NEXT: pushq %rax ; CHECK64-NEXT: callq foo@PLT ; CHECK64-NEXT: popq %rax -; CHECK64-NEXT: .LBB2_2: # %false +; CHECK64-NEXT: .LBB8_2: # %false ; CHECK64-NEXT: retq ; ; CHECK32-LABEL: isint_branch: @@ -80,12 +497,40 @@ define void @isint_branch(double %d) nounwind { ; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1 ; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK32-NEXT: ucomisd %xmm1, %xmm0 -; CHECK32-NEXT: jne .LBB2_2 -; CHECK32-NEXT: jp .LBB2_2 +; CHECK32-NEXT: jne .LBB8_2 +; CHECK32-NEXT: jp .LBB8_2 ; CHECK32-NEXT: # %bb.1: # %true ; CHECK32-NEXT: calll foo@PLT -; CHECK32-NEXT: .LBB2_2: # %false +; CHECK32-NEXT: .LBB8_2: # %false ; CHECK32-NEXT: retl +; +; AVX512-NODQ-LABEL: isint_branch: +; AVX512-NODQ: # %bb.0: +; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX512-NODQ-NEXT: vucomisd %xmm1, %xmm0 +; AVX512-NODQ-NEXT: jne .LBB8_2 +; AVX512-NODQ-NEXT: jp .LBB8_2 +; AVX512-NODQ-NEXT: # %bb.1: # %true +; AVX512-NODQ-NEXT: pushq %rax +; AVX512-NODQ-NEXT: callq foo@PLT +; AVX512-NODQ-NEXT: popq %rax +; AVX512-NODQ-NEXT: .LBB8_2: # %false +; AVX512-NODQ-NEXT: retq +; +; AVX512VL-LABEL: isint_branch: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 +; AVX512VL-NEXT: jne .LBB8_2 +; AVX512VL-NEXT: jp .LBB8_2 +; AVX512VL-NEXT: # %bb.1: # %true +; AVX512VL-NEXT: pushq %rax +; AVX512VL-NEXT: callq foo@PLT +; AVX512VL-NEXT: popq %rax +; AVX512VL-NEXT: .LBB8_2: # %false +; AVX512VL-NEXT: retq %i = fptosi double %d to i32 %e = sitofp i32 %i to double %c = fcmp oeq double %d, %e From b7f1545f29a61ca9cd1b2bf78ba134329c2709af Mon Sep 17 00:00:00 2001 From: Kavin Gnanapandithan Date: Thu, 9 Oct 2025 09:07:02 -0400 Subject: [PATCH 2/6] Formatted lowerFPToIntToFP using git clang-format --- llvm/lib/Target/X86/X86ISelLowering.cpp | 44 ++++++++++++++++--------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e209405bf99e..946dbf8361aaf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19885,7 +19885,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, // TODO: Allow FP_TO_UINT. SDValue CastToInt = CastToFP.getOperand(0); MVT VT = CastToFP.getSimpleValueType(); - if ((CastToInt.getOpcode() != ISD::FP_TO_SINT && CastToInt.getOpcode() != ISD::FP_TO_UINT) || VT.isVector()) + if ((CastToInt.getOpcode() != ISD::FP_TO_SINT && + CastToInt.getOpcode() != ISD::FP_TO_UINT) || + VT.isVector()) return SDValue(); MVT IntVT = CastToInt.getSimpleValueType(); @@ -19913,25 +19915,35 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, if (Subtarget.hasVLX()) { if (IsUnsigned) { - ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; - ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; } else { - ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; - ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; } - } else { + } else { // SSE2 - ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; - ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; } } else { if (Subtarget.hasVLX()) { if (IsUnsigned) { - ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; - ToFPOpcode = IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; } else { - ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; - ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; } } else { // Need to extend width for AVX512DQ @@ -19941,19 +19953,19 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, } } - MVT VecSrcVT; + MVT VecSrcVT; MVT VecIntVT; MVT VecVT; if (IntVT == MVT::i64) { unsigned NumElts = Width / IntSize; VecIntVT = MVT::getVectorVT(IntVT, NumElts); - + // minimum legal size is v4f32 unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts; unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts; - + VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts); - VecVT = MVT::getVectorVT(VT, VTElts); + VecVT = MVT::getVectorVT(VT, VTElts); } else { VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize); VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize); From ba8fad5dbc83823aa99c9ef898246a2c7c0c9ef8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 10 Oct 2025 13:35:46 +0100 Subject: [PATCH 3/6] [X86] Add additional test coverage for #160111 --- llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 240 +++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 llvm/test/CodeGen/X86/fp-int-fp-cvt.ll diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll new file mode 100644 index 0000000000000..b6c17cecffbd6 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll @@ -0,0 +1,240 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512,AVX512-VL +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 -mattr=-avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512-NOVL + +; +; fptosi -> sitofp +; + +define double @scvtf64_i32(double %a0) { +; SSE-LABEL: scvtf64_i32: +; SSE: # %bb.0: +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: scvtf64_i32: +; AVX: # %bb.0: +; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: retq + %ii = fptosi double %a0 to i32 + %ff = sitofp i32 %ii to double + ret double %ff +} + +define double @scvtf64_i64(double %a0) { +; SSE-LABEL: scvtf64_i64: +; SSE: # %bb.0: +; SSE-NEXT: cvttsd2si %xmm0, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2sd %rax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: scvtf64_i64: +; AVX: # %bb.0: +; AVX-NEXT: vcvttsd2si %xmm0, %rax +; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 +; AVX-NEXT: retq + %ii = fptosi double %a0 to i64 + %ff = sitofp i64 %ii to double + ret double %ff +} + +define float @scvtf32_i32(float %a0) { +; SSE-LABEL: scvtf32_i32: +; SSE: # %bb.0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: scvtf32_i32: +; AVX: # %bb.0: +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq + %ii = fptosi float %a0 to i32 + %ff = sitofp i32 %ii to float + ret float %ff +} + +define float @scvtf32_i64(float %a0) { +; SSE-LABEL: scvtf32_i64: +; SSE: # %bb.0: +; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ss %rax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: scvtf32_i64: +; AVX: # %bb.0: +; AVX-NEXT: vcvttss2si %xmm0, %rax +; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; AVX-NEXT: retq + %ii = fptosi float %a0 to i64 + %ff = sitofp i64 %ii to float + ret float %ff +} + +; +; fptoui -> uitofp +; + +define double @ucvtf64_i32(double %a0) { +; SSE-LABEL: ucvtf64_i32: +; SSE: # %bb.0: +; SSE-NEXT: cvttsd2si %xmm0, %rax +; SSE-NEXT: movl %eax, %eax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2sd %rax, %xmm0 +; SSE-NEXT: retq +; +; AVX2-LABEL: ucvtf64_i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttsd2si %xmm0, %rax +; AVX2-NEXT: movl %eax, %eax +; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: ucvtf64_i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0 +; AVX512-NEXT: retq + %ii = fptoui double %a0 to i32 + %ff = uitofp i32 %ii to double + ret double %ff +} + +define double @ucvtf64_i64(double %a0) { +; SSE-LABEL: ucvtf64_i64: +; SSE: # %bb.0: +; SSE-NEXT: cvttsd2si %xmm0, %rax +; SSE-NEXT: movq %rax, %rcx +; SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: sarq $63, %rcx +; SSE-NEXT: andq %rcx, %rdx +; SSE-NEXT: orq %rax, %rdx +; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: addsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX2-LABEL: ucvtf64_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttsd2si %xmm0, %rax +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: sarq $63, %rcx +; AVX2-NEXT: vcvttsd2si %xmm0, %rdx +; AVX2-NEXT: andq %rcx, %rdx +; AVX2-NEXT: orq %rax, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX2-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: ucvtf64_i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 +; AVX512-NEXT: retq + %ii = fptoui double %a0 to i64 + %ff = uitofp i64 %ii to double + ret double %ff +} + +define float @ucvtf32_i32(float %a0) { +; SSE-LABEL: ucvtf32_i32: +; SSE: # %bb.0: +; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: movl %eax, %eax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ss %rax, %xmm0 +; SSE-NEXT: retq +; +; AVX2-LABEL: ucvtf32_i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttss2si %xmm0, %rax +; AVX2-NEXT: movl %eax, %eax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: ucvtf32_i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvttss2usi %xmm0, %eax +; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 +; AVX512-NEXT: retq + %ii = fptoui float %a0 to i32 + %ff = uitofp i32 %ii to float + ret float %ff +} + +define float @ucvtf32_i64(float %a0) { +; SSE-LABEL: ucvtf32_i64: +; SSE: # %bb.0: +; SSE-NEXT: cvttss2si %xmm0, %rcx +; SSE-NEXT: movq %rcx, %rdx +; SSE-NEXT: sarq $63, %rdx +; SSE-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: andq %rdx, %rax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: js .LBB7_1 +; SSE-NEXT: # %bb.2: +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ss %rax, %xmm0 +; SSE-NEXT: retq +; SSE-NEXT: .LBB7_1: +; SSE-NEXT: movq %rax, %rcx +; SSE-NEXT: shrq %rcx +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ss %rax, %xmm0 +; SSE-NEXT: addss %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX2-LABEL: ucvtf32_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttss2si %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rdx +; AVX2-NEXT: sarq $63, %rdx +; AVX2-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vcvttss2si %xmm0, %rax +; AVX2-NEXT: andq %rdx, %rax +; AVX2-NEXT: orq %rcx, %rax +; AVX2-NEXT: js .LBB7_1 +; AVX2-NEXT: # %bb.2: +; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB7_1: +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: orq %rcx, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: ucvtf32_i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvttss2usi %xmm0, %rax +; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 +; AVX512-NEXT: retq + %ii = fptoui float %a0 to i64 + %ff = uitofp i64 %ii to float + ret float %ff +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX512-NOVL: {{.*}} +; AVX512-VL: {{.*}} From 8d084530d213bb126179c03793be1555083081f6 Mon Sep 17 00:00:00 2001 From: Kavin Gnanapandithan Date: Sat, 11 Oct 2025 16:11:44 -0400 Subject: [PATCH 4/6] Added i64 handling in lowerFPToIntToFP & modified associated test case --- llvm/lib/Target/X86/X86ISelLowering.cpp | 85 ++++++++++--------------- llvm/test/CodeGen/X86/fp-int-fp-cvt.ll | 53 +++++++++++---- 2 files changed, 72 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e258f2793f66d..2813991d9c1aa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19934,68 +19934,47 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, unsigned Width = 128; bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT; - if (IntVT == MVT::i32) { - if (IsUnsigned && !Subtarget.hasVLX()) - return SDValue(); // Need AVX512VL for unsigned i32 - - if (Subtarget.hasVLX()) { - if (IsUnsigned) { - ToIntOpcode = - SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; - ToFPOpcode = - IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; - } else { - ToIntOpcode = - SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; - ToFPOpcode = - IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; - } + if (Subtarget.hasVLX() && IntVT == MVT::i64) { + // AVX512DQ+VLX + if (IsUnsigned) { + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; } else { - // SSE2 ToIntOpcode = SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; ToFPOpcode = IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; } + } else if (IntVT == MVT::i64) { + // Need to extend width for AVX512DQ without AVX512VL + Width = 512; + ToIntOpcode = CastToInt.getOpcode(); + ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP; } else { - if (Subtarget.hasVLX()) { - if (IsUnsigned) { - ToIntOpcode = - SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT; - ToFPOpcode = - IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP; - } else { - ToIntOpcode = - SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; - ToFPOpcode = - IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; - } - } else { - // Need to extend width for AVX512DQ - Width = 512; - ToIntOpcode = CastToInt.getOpcode(); - ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP; - } - } - - MVT VecSrcVT; - MVT VecIntVT; - MVT VecVT; - if (IntVT == MVT::i64) { - unsigned NumElts = Width / IntSize; - VecIntVT = MVT::getVectorVT(IntVT, NumElts); - - // minimum legal size is v4f32 - unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts; - unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts; - - VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts); - VecVT = MVT::getVectorVT(VT, VTElts); + // SSE2 + ToIntOpcode = + SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + ToFPOpcode = + IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + } + + MVT VecSrcVT, VecIntVT, VecVT; + unsigned NumElts = Width / IntSize; + VecIntVT = MVT::getVectorVT(IntVT, NumElts); + unsigned SrcElts, VTElts; + // vcvttps2qq cannot convert v16f32 <-> v8i64 + if (IntVT == MVT::i64 && Width == 512) { + SrcElts = NumElts; + VTElts = NumElts; } else { - VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize); - VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize); - VecVT = MVT::getVectorVT(VT, Width / VTSize); + SrcElts = Width / SrcSize; + VTElts = Width / VTSize; } + + VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts); + VecVT = MVT::getVectorVT(VT, VTElts); // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0 // // We are not defining the high elements (for example, zero them) because diff --git a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll index b6c17cecffbd6..c0a9c6113b9e8 100644 --- a/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll +++ b/llvm/test/CodeGen/X86/fp-int-fp-cvt.ll @@ -34,11 +34,26 @@ define double @scvtf64_i64(double %a0) { ; SSE-NEXT: cvtsi2sd %rax, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: scvtf64_i64: -; AVX: # %bb.0: -; AVX-NEXT: vcvttsd2si %xmm0, %rax -; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: scvtf64_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttsd2si %xmm0, %rax +; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-VL-LABEL: scvtf64_i64: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vcvttpd2qq %xmm0, %xmm0 +; AVX512-VL-NEXT: vcvtqq2pd %xmm0, %xmm0 +; AVX512-VL-NEXT: retq +; +; AVX512-NOVL-LABEL: scvtf64_i64: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NOVL-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512-NOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NOVL-NEXT: vzeroupper +; AVX512-NOVL-NEXT: retq %ii = fptosi double %a0 to i64 %ff = sitofp i64 %ii to double ret double %ff @@ -69,11 +84,26 @@ define float @scvtf32_i64(float %a0) { ; SSE-NEXT: cvtsi2ss %rax, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: scvtf32_i64: -; AVX: # %bb.0: -; AVX-NEXT: vcvttss2si %xmm0, %rax -; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: scvtf32_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcvttss2si %xmm0, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-VL-LABEL: scvtf32_i64: +; AVX512-VL: # %bb.0: +; AVX512-VL-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512-VL-NEXT: vcvtqq2ps %xmm0, %xmm0 +; AVX512-VL-NEXT: retq +; +; AVX512-NOVL-LABEL: scvtf32_i64: +; AVX512-NOVL: # %bb.0: +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NOVL-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512-NOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 +; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NOVL-NEXT: vzeroupper +; AVX512-NOVL-NEXT: retq %ii = fptosi float %a0 to i64 %ff = sitofp i64 %ii to float ret float %ff @@ -235,6 +265,3 @@ define float @ucvtf32_i64(float %a0) { %ff = uitofp i64 %ii to float ret float %ff } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX512-NOVL: {{.*}} -; AVX512-VL: {{.*}} From a4aae578510be0764980645336894777a791b657 Mon Sep 17 00:00:00 2001 From: Kavin Gnanapandithan Date: Sat, 11 Oct 2025 16:12:38 -0400 Subject: [PATCH 5/6] Updated prefix in isint.ll --- llvm/test/CodeGen/X86/isint.ll | 655 ++++++++++++++++----------------- 1 file changed, 317 insertions(+), 338 deletions(-) diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll index f679821622cd9..691a56197ca32 100644 --- a/llvm/test/CodeGen/X86/isint.ll +++ b/llvm/test/CodeGen/X86/isint.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s -; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s -; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f | FileCheck -check-prefix=AVX512-NODQ %s -; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq | FileCheck -check-prefix=AVX512-NODQ %s +; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s +; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=SSE2 %s ; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck -check-prefix=AVX512VL %s ; PR19059 @@ -17,23 +15,25 @@ define i32 @isint_return(double %d) nounwind { ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq ; -; CHECK32-LABEL: isint_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1 -; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1 -; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1 -; CHECK32-NEXT: movd %xmm1, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: retl +; X86-LABEL: isint_return: +; X86: # %bb.0: +; X86-NEXT: cvttpd2dq %xmm0, %xmm1 +; X86-NEXT: cvtdq2pd %xmm1, %xmm1 +; X86-NEXT: cmpeqsd %xmm0, %xmm1 +; X86-NEXT: movq %xmm1, %rax +; X86-NEXT: andl $1, %eax +; X86-NEXT: # kill: def $eax killed $eax killed $rax +; X86-NEXT: retq ; -; AVX512-NODQ-LABEL: isint_return: -; AVX512-NODQ: # %bb.0: -; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1 -; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1 -; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 -; AVX512-NODQ-NEXT: kmovw %k0, %eax -; AVX512-NODQ-NEXT: retq +; SSE2-LABEL: isint_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE2-NEXT: cmpeqsd %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl ; ; AVX512VL-LABEL: isint_return: ; AVX512VL: # %bb.0: @@ -59,23 +59,24 @@ define i32 @isint_float_return(float %f) nounwind { ; X64-NEXT: andl $1, %eax ; X64-NEXT: retq ; -; CHECK32-LABEL: isint_float_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK32-NEXT: cvttps2dq %xmm0, %xmm1 -; CHECK32-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK32-NEXT: cmpeqss %xmm0, %xmm1 -; CHECK32-NEXT: movd %xmm1, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: retl +; X86-LABEL: isint_float_return: +; X86: # %bb.0: +; X86-NEXT: cvttps2dq %xmm0, %xmm1 +; X86-NEXT: cvtdq2ps %xmm1, %xmm1 +; X86-NEXT: cmpeqss %xmm0, %xmm1 +; X86-NEXT: movd %xmm1, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq ; -; AVX512-NODQ-LABEL: isint_float_return: -; AVX512-NODQ: # %bb.0: -; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1 -; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1 -; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0 -; AVX512-NODQ-NEXT: kmovw %k0, %eax -; AVX512-NODQ-NEXT: retq +; SSE2-LABEL: isint_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: cvttps2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE2-NEXT: cmpeqss %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl ; ; AVX512VL-LABEL: isint_float_return: ; AVX512VL: # %bb.0: @@ -92,42 +93,42 @@ define i32 @isint_float_return(float %f) nounwind { } define i64 @isint64_float_return(float %f) nounwind { -; CHECK64-LABEL: isint64_float_return: -; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttss2si %xmm0, %rax -; CHECK64-NEXT: cvtsi2ss %rax, %xmm1 -; CHECK64-NEXT: cmpeqss %xmm0, %xmm1 -; CHECK64-NEXT: movd %xmm1, %eax -; CHECK64-NEXT: andl $1, %eax -; CHECK64-NEXT: retq +; X86-LABEL: isint64_float_return: +; X86: # %bb.0: +; X86-NEXT: cvttss2si %xmm0, %rax +; X86-NEXT: cvtsi2ss %rax, %xmm1 +; X86-NEXT: cmpeqss %xmm0, %xmm1 +; X86-NEXT: movd %xmm1, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq ; -; CHECK32-LABEL: isint64_float_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: pushl %ebp -; CHECK32-NEXT: movl %esp, %ebp -; CHECK32-NEXT: andl $-8, %esp -; CHECK32-NEXT: subl $32, %esp -; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; CHECK32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00 -; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) -; CHECK32-NEXT: fildll {{[0-9]+}}(%esp) -; CHECK32-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 -; CHECK32-NEXT: movd %xmm0, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: xorl %edx, %edx -; CHECK32-NEXT: movl %ebp, %esp -; CHECK32-NEXT: popl %ebp -; CHECK32-NEXT: retl +; SSE2-LABEL: isint64_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $32, %esp +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; SSE2-NEXT: flds {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE2-NEXT: movw %ax, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; SSE2-NEXT: fildll {{[0-9]+}}(%esp) +; SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; SSE2-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl ; ; AVX512VL-LABEL: isint64_float_return: ; AVX512VL: # %bb.0: @@ -144,42 +145,42 @@ define i64 @isint64_float_return(float %f) nounwind { } define i64 @isint64_return(double %d) nounwind { -; CHECK64-LABEL: isint64_return: -; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttsd2si %xmm0, %rax -; CHECK64-NEXT: cvtsi2sd %rax, %xmm1 -; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1 -; CHECK64-NEXT: movq %xmm1, %rax -; CHECK64-NEXT: andl $1, %eax -; CHECK64-NEXT: retq +; X86-LABEL: isint64_return: +; X86: # %bb.0: +; X86-NEXT: cvttsd2si %xmm0, %rax +; X86-NEXT: cvtsi2sd %rax, %xmm1 +; X86-NEXT: cmpeqsd %xmm0, %xmm1 +; X86-NEXT: movq %xmm1, %rax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq ; -; CHECK32-LABEL: isint64_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: pushl %ebp -; CHECK32-NEXT: movl %esp, %ebp -; CHECK32-NEXT: andl $-8, %esp -; CHECK32-NEXT: subl $32, %esp -; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldl {{[0-9]+}}(%esp) -; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00 -; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) -; CHECK32-NEXT: fildll {{[0-9]+}}(%esp) -; CHECK32-NEXT: fstpl {{[0-9]+}}(%esp) -; CHECK32-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0 -; CHECK32-NEXT: movd %xmm0, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: xorl %edx, %edx -; CHECK32-NEXT: movl %ebp, %esp -; CHECK32-NEXT: popl %ebp -; CHECK32-NEXT: retl +; SSE2-LABEL: isint64_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $32, %esp +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE2-NEXT: movw %ax, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; SSE2-NEXT: fildll {{[0-9]+}}(%esp) +; SSE2-NEXT: fstpl {{[0-9]+}}(%esp) +; SSE2-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl ; ; AVX512VL-LABEL: isint64_return: ; AVX512VL: # %bb.0: @@ -196,36 +197,6 @@ define i64 @isint64_return(double %d) nounwind { } define i32 @isuint_return(double %d) nounwind { -; CHECK64-LABEL: isuint_return: -; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttsd2si %xmm0, %rax -; CHECK64-NEXT: movl %eax, %eax -; CHECK64-NEXT: cvtsi2sd %rax, %xmm1 -; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1 -; CHECK64-NEXT: movq %xmm1, %rax -; CHECK64-NEXT: andl $1, %eax -; CHECK64-NEXT: # kill: def $eax killed $eax killed $rax -; CHECK64-NEXT: retq -; -; CHECK32-LABEL: isuint_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK32-NEXT: cvttsd2si %xmm0, %eax -; CHECK32-NEXT: movl %eax, %ecx -; CHECK32-NEXT: sarl $31, %ecx -; CHECK32-NEXT: movapd %xmm0, %xmm1 -; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; CHECK32-NEXT: cvttsd2si %xmm1, %edx -; CHECK32-NEXT: andl %ecx, %edx -; CHECK32-NEXT: orl %eax, %edx -; CHECK32-NEXT: movd %edx, %xmm1 -; CHECK32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1 -; CHECK32-NEXT: movd %xmm1, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: retl -; ; AVX512-NODQ-LABEL: isuint_return: ; AVX512-NODQ: # %bb.0: ; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %eax @@ -234,6 +205,36 @@ define i32 @isuint_return(double %d) nounwind { ; AVX512-NODQ-NEXT: kmovw %k0, %eax ; AVX512-NODQ-NEXT: retq ; +; X86-LABEL: isuint_return: +; X86: # %bb.0: +; X86-NEXT: cvttsd2si %xmm0, %rax +; X86-NEXT: movl %eax, %eax +; X86-NEXT: cvtsi2sd %rax, %xmm1 +; X86-NEXT: cmpeqsd %xmm0, %xmm1 +; X86-NEXT: movq %xmm1, %rax +; X86-NEXT: andl $1, %eax +; X86-NEXT: # kill: def $eax killed $eax killed $rax +; X86-NEXT: retq +; +; SSE2-LABEL: isuint_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: cvttsd2si %xmm0, %eax +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: sarl $31, %ecx +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; SSE2-NEXT: cvttsd2si %xmm1, %edx +; SSE2-NEXT: andl %ecx, %edx +; SSE2-NEXT: orl %eax, %edx +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; SSE2-NEXT: cmpeqsd %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl +; ; AVX512VL-LABEL: isuint_return: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax @@ -249,25 +250,6 @@ define i32 @isuint_return(double %d) nounwind { } define i32 @isuint_float_return(float %f) nounwind { -; CHECK64-LABEL: isuint_float_return: -; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttps2dq %xmm0, %xmm1 -; CHECK64-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK64-NEXT: cmpeqss %xmm0, %xmm1 -; CHECK64-NEXT: movd %xmm1, %eax -; CHECK64-NEXT: andl $1, %eax -; CHECK64-NEXT: retq -; -; CHECK32-LABEL: isuint_float_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK32-NEXT: cvttps2dq %xmm0, %xmm1 -; CHECK32-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK32-NEXT: cmpeqss %xmm0, %xmm1 -; CHECK32-NEXT: movd %xmm1, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: retl -; ; AVX512-NODQ-LABEL: isuint_float_return: ; AVX512-NODQ: # %bb.0: ; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1 @@ -276,6 +258,25 @@ define i32 @isuint_float_return(float %f) nounwind { ; AVX512-NODQ-NEXT: kmovw %k0, %eax ; AVX512-NODQ-NEXT: retq ; +; X86-LABEL: isuint_float_return: +; X86: # %bb.0: +; X86-NEXT: cvttps2dq %xmm0, %xmm1 +; X86-NEXT: cvtdq2ps %xmm1, %xmm1 +; X86-NEXT: cmpeqss %xmm0, %xmm1 +; X86-NEXT: movd %xmm1, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq +; +; SSE2-LABEL: isuint_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: cvttps2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE2-NEXT: cmpeqss %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: retl +; ; AVX512VL-LABEL: isuint_float_return: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1 @@ -291,71 +292,6 @@ define i32 @isuint_float_return(float %f) nounwind { } define i64 @isuint64_return(double %d) nounwind { -; CHECK64-LABEL: isuint64_return: -; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttsd2si %xmm0, %rax -; CHECK64-NEXT: movq %rax, %rcx -; CHECK64-NEXT: sarq $63, %rcx -; CHECK64-NEXT: movapd %xmm0, %xmm1 -; CHECK64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK64-NEXT: cvttsd2si %xmm1, %rdx -; CHECK64-NEXT: andq %rcx, %rdx -; CHECK64-NEXT: orq %rax, %rdx -; CHECK64-NEXT: movq %rdx, %xmm1 -; CHECK64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; CHECK64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK64-NEXT: movapd %xmm1, %xmm2 -; CHECK64-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] -; CHECK64-NEXT: addsd %xmm1, %xmm2 -; CHECK64-NEXT: cmpeqsd %xmm0, %xmm2 -; CHECK64-NEXT: movq %xmm2, %rax -; CHECK64-NEXT: andl $1, %eax -; CHECK64-NEXT: retq -; -; CHECK32-LABEL: isuint64_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: pushl %ebp -; CHECK32-NEXT: movl %esp, %ebp -; CHECK32-NEXT: andl $-8, %esp -; CHECK32-NEXT: subl $16, %esp -; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK32-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] -; CHECK32-NEXT: ucomisd %xmm0, %xmm1 -; CHECK32-NEXT: jbe .LBB6_2 -; CHECK32-NEXT: # %bb.1: -; CHECK32-NEXT: xorpd %xmm1, %xmm1 -; CHECK32-NEXT: .LBB6_2: -; CHECK32-NEXT: movapd %xmm0, %xmm2 -; CHECK32-NEXT: subsd %xmm1, %xmm2 -; CHECK32-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) -; CHECK32-NEXT: setbe %al -; CHECK32-NEXT: fldl {{[0-9]+}}(%esp) -; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00 -; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movzbl %al, %eax -; CHECK32-NEXT: shll $31, %eax -; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movd %eax, %xmm1 -; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] -; CHECK32-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; CHECK32-NEXT: movapd %xmm2, %xmm1 -; CHECK32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] -; CHECK32-NEXT: addsd %xmm2, %xmm1 -; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1 -; CHECK32-NEXT: movd %xmm1, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: xorl %edx, %edx -; CHECK32-NEXT: movl %ebp, %esp -; CHECK32-NEXT: popl %ebp -; CHECK32-NEXT: retl -; ; AVX512-NODQ-LABEL: isuint64_return: ; AVX512-NODQ: # %bb.0: ; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %rax @@ -364,6 +300,71 @@ define i64 @isuint64_return(double %d) nounwind { ; AVX512-NODQ-NEXT: kmovw %k0, %eax ; AVX512-NODQ-NEXT: retq ; +; X86-LABEL: isuint64_return: +; X86: # %bb.0: +; X86-NEXT: cvttsd2si %xmm0, %rax +; X86-NEXT: movq %rax, %rcx +; X86-NEXT: sarq $63, %rcx +; X86-NEXT: movapd %xmm0, %xmm1 +; X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X86-NEXT: cvttsd2si %xmm1, %rdx +; X86-NEXT: andq %rcx, %rdx +; X86-NEXT: orq %rax, %rdx +; X86-NEXT: movq %rdx, %xmm1 +; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; X86-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X86-NEXT: movapd %xmm1, %xmm2 +; X86-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; X86-NEXT: addsd %xmm1, %xmm2 +; X86-NEXT: cmpeqsd %xmm0, %xmm2 +; X86-NEXT: movq %xmm2, %rax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq +; +; SSE2-LABEL: isuint64_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $16, %esp +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] +; SSE2-NEXT: ucomisd %xmm0, %xmm1 +; SSE2-NEXT: jbe .LBB6_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: .LBB6_2: +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: subsd %xmm1, %xmm2 +; SSE2-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) +; SSE2-NEXT: setbe %al +; SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SSE2-NEXT: orl $3072, %ecx # imm = 0xC00 +; SSE2-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzbl %al, %eax +; SSE2-NEXT: shll $31, %eax +; SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] +; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE2-NEXT: addsd %xmm2, %xmm1 +; SSE2-NEXT: cmpeqsd %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl +; ; AVX512VL-LABEL: isuint64_return: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax @@ -379,86 +380,78 @@ define i64 @isuint64_return(double %d) nounwind { } define i64 @isuint64_float_return(float %f) nounwind { -; CHECK64-LABEL: isuint64_float_return: -; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttss2si %xmm0, %rcx -; CHECK64-NEXT: movq %rcx, %rdx -; CHECK64-NEXT: sarq $63, %rdx -; CHECK64-NEXT: movaps %xmm0, %xmm1 -; CHECK64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK64-NEXT: cvttss2si %xmm1, %rax -; CHECK64-NEXT: andq %rdx, %rax -; CHECK64-NEXT: orq %rcx, %rax -; CHECK64-NEXT: js .LBB7_1 -; CHECK64-NEXT: # %bb.2: -; CHECK64-NEXT: xorps %xmm1, %xmm1 -; CHECK64-NEXT: cvtsi2ss %rax, %xmm1 -; CHECK64-NEXT: jmp .LBB7_3 -; CHECK64-NEXT: .LBB7_1: -; CHECK64-NEXT: movq %rax, %rcx -; CHECK64-NEXT: shrq %rcx -; CHECK64-NEXT: andl $1, %eax -; CHECK64-NEXT: orq %rcx, %rax -; CHECK64-NEXT: xorps %xmm1, %xmm1 -; CHECK64-NEXT: cvtsi2ss %rax, %xmm1 -; CHECK64-NEXT: addss %xmm1, %xmm1 -; CHECK64-NEXT: .LBB7_3: -; CHECK64-NEXT: cmpeqss %xmm1, %xmm0 -; CHECK64-NEXT: movd %xmm0, %eax -; CHECK64-NEXT: andl $1, %eax -; CHECK64-NEXT: retq +; X86-LABEL: isuint64_float_return: +; X86: # %bb.0: +; X86-NEXT: cvttss2si %xmm0, %rcx +; X86-NEXT: movq %rcx, %rdx +; X86-NEXT: sarq $63, %rdx +; X86-NEXT: movaps %xmm0, %xmm1 +; X86-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X86-NEXT: cvttss2si %xmm1, %rax +; X86-NEXT: andq %rdx, %rax +; X86-NEXT: orq %rcx, %rax +; X86-NEXT: js .LBB7_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: xorps %xmm1, %xmm1 +; X86-NEXT: cvtsi2ss %rax, %xmm1 +; X86-NEXT: jmp .LBB7_3 +; X86-NEXT: .LBB7_1: +; X86-NEXT: movq %rax, %rcx +; X86-NEXT: shrq %rcx +; X86-NEXT: andl $1, %eax +; X86-NEXT: orq %rcx, %rax +; X86-NEXT: xorps %xmm1, %xmm1 +; X86-NEXT: cvtsi2ss %rax, %xmm1 +; X86-NEXT: addss %xmm1, %xmm1 +; X86-NEXT: .LBB7_3: +; X86-NEXT: cmpeqss %xmm1, %xmm0 +; X86-NEXT: movd %xmm0, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: retq ; -; CHECK32-LABEL: isuint64_float_return: -; CHECK32: # %bb.0: -; CHECK32-NEXT: pushl %ebp -; CHECK32-NEXT: movl %esp, %ebp -; CHECK32-NEXT: andl $-8, %esp -; CHECK32-NEXT: subl $32, %esp -; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] -; CHECK32-NEXT: ucomiss %xmm0, %xmm1 -; CHECK32-NEXT: jbe .LBB7_2 -; CHECK32-NEXT: # %bb.1: -; CHECK32-NEXT: xorps %xmm1, %xmm1 -; CHECK32-NEXT: .LBB7_2: -; CHECK32-NEXT: movaps %xmm0, %xmm2 -; CHECK32-NEXT: subss %xmm1, %xmm2 -; CHECK32-NEXT: movss %xmm2, {{[0-9]+}}(%esp) -; CHECK32-NEXT: setbe %al -; CHECK32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00 -; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp) -; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK32-NEXT: movzbl %al, %eax -; CHECK32-NEXT: shll $31, %eax -; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movd %eax, %xmm1 -; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; CHECK32-NEXT: movq %xmm2, {{[0-9]+}}(%esp) -; CHECK32-NEXT: shrl $31, %eax -; CHECK32-NEXT: fildll {{[0-9]+}}(%esp) -; CHECK32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) -; CHECK32-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 -; CHECK32-NEXT: movd %xmm0, %eax -; CHECK32-NEXT: andl $1, %eax -; CHECK32-NEXT: xorl %edx, %edx -; CHECK32-NEXT: movl %ebp, %esp -; CHECK32-NEXT: popl %ebp -; CHECK32-NEXT: retl -; -; AVX512-NODQ-LABEL: isuint64_float_return: -; AVX512-NODQ: # %bb.0: -; AVX512-NODQ-NEXT: vcvttss2usi %xmm0, %rax -; AVX512-NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1 -; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0 -; AVX512-NODQ-NEXT: kmovw %k0, %eax -; AVX512-NODQ-NEXT: retq +; SSE2-LABEL: isuint64_float_return: +; SSE2: # %bb.0: +; SSE2-NEXT: pushl %ebp +; SSE2-NEXT: movl %esp, %ebp +; SSE2-NEXT: andl $-8, %esp +; SSE2-NEXT: subl $32, %esp +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] +; SSE2-NEXT: ucomiss %xmm0, %xmm1 +; SSE2-NEXT: jbe .LBB7_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: .LBB7_2: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: subss %xmm1, %xmm2 +; SSE2-NEXT: movss %xmm2, {{[0-9]+}}(%esp) +; SSE2-NEXT: setbe %al +; SSE2-NEXT: flds {{[0-9]+}}(%esp) +; SSE2-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SSE2-NEXT: orl $3072, %ecx # imm = 0xC00 +; SSE2-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2-NEXT: movzbl %al, %eax +; SSE2-NEXT: shll $31, %eax +; SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: movq %xmm2, {{[0-9]+}}(%esp) +; SSE2-NEXT: shrl $31, %eax +; SSE2-NEXT: fildll {{[0-9]+}}(%esp) +; SSE2-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE2-NEXT: fstps {{[0-9]+}}(%esp) +; SSE2-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: movl %ebp, %esp +; SSE2-NEXT: popl %ebp +; SSE2-NEXT: retl ; ; AVX512VL-LABEL: isuint64_float_return: ; AVX512VL: # %bb.0: @@ -477,46 +470,32 @@ define i64 @isuint64_float_return(float %f) nounwind { declare void @foo() define void @isint_branch(double %d) nounwind { -; CHECK64-LABEL: isint_branch: -; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1 -; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1 -; CHECK64-NEXT: ucomisd %xmm1, %xmm0 -; CHECK64-NEXT: jne .LBB8_2 -; CHECK64-NEXT: jp .LBB8_2 -; CHECK64-NEXT: # %bb.1: # %true -; CHECK64-NEXT: pushq %rax -; CHECK64-NEXT: callq foo@PLT -; CHECK64-NEXT: popq %rax -; CHECK64-NEXT: .LBB8_2: # %false -; CHECK64-NEXT: retq +; X86-LABEL: isint_branch: +; X86: # %bb.0: +; X86-NEXT: cvttpd2dq %xmm0, %xmm1 +; X86-NEXT: cvtdq2pd %xmm1, %xmm1 +; X86-NEXT: ucomisd %xmm1, %xmm0 +; X86-NEXT: jne .LBB8_2 +; X86-NEXT: jp .LBB8_2 +; X86-NEXT: # %bb.1: # %true +; X86-NEXT: pushq %rax +; X86-NEXT: callq foo@PLT +; X86-NEXT: popq %rax +; X86-NEXT: .LBB8_2: # %false +; X86-NEXT: retq ; -; CHECK32-LABEL: isint_branch: -; CHECK32: # %bb.0: -; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1 -; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1 -; CHECK32-NEXT: ucomisd %xmm1, %xmm0 -; CHECK32-NEXT: jne .LBB8_2 -; CHECK32-NEXT: jp .LBB8_2 -; CHECK32-NEXT: # %bb.1: # %true -; CHECK32-NEXT: calll foo@PLT -; CHECK32-NEXT: .LBB8_2: # %false -; CHECK32-NEXT: retl -; -; AVX512-NODQ-LABEL: isint_branch: -; AVX512-NODQ: # %bb.0: -; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1 -; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1 -; AVX512-NODQ-NEXT: vucomisd %xmm1, %xmm0 -; AVX512-NODQ-NEXT: jne .LBB8_2 -; AVX512-NODQ-NEXT: jp .LBB8_2 -; AVX512-NODQ-NEXT: # %bb.1: # %true -; AVX512-NODQ-NEXT: pushq %rax -; AVX512-NODQ-NEXT: callq foo@PLT -; AVX512-NODQ-NEXT: popq %rax -; AVX512-NODQ-NEXT: .LBB8_2: # %false -; AVX512-NODQ-NEXT: retq +; SSE2-LABEL: isint_branch: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: cvttpd2dq %xmm0, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE2-NEXT: ucomisd %xmm1, %xmm0 +; SSE2-NEXT: jne .LBB8_2 +; SSE2-NEXT: jp .LBB8_2 +; SSE2-NEXT: # %bb.1: # %true +; SSE2-NEXT: calll foo@PLT +; SSE2-NEXT: .LBB8_2: # %false +; SSE2-NEXT: retl ; ; AVX512VL-LABEL: isint_branch: ; AVX512VL: # %bb.0: From 4726c5dde09dcbcd4f5396155c82c54f2394f69f Mon Sep 17 00:00:00 2001 From: Kavin Gnanapandithan Date: Sat, 11 Oct 2025 16:43:02 -0400 Subject: [PATCH 6/6] Formatted code wwith clang-format --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2813991d9c1aa..9824bdd101098 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19964,7 +19964,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, unsigned NumElts = Width / IntSize; VecIntVT = MVT::getVectorVT(IntVT, NumElts); unsigned SrcElts, VTElts; - // vcvttps2qq cannot convert v16f32 <-> v8i64 + // vcvttps2qq cannot convert v16f32 <-> v8i64 if (IntVT == MVT::i64 && Width == 512) { SrcElts = NumElts; VTElts = NumElts; @@ -19972,7 +19972,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL, SrcElts = Width / SrcSize; VTElts = Width / VTSize; } - + VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts); VecVT = MVT::getVectorVT(VT, VTElts); // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0