Skip to content

Conversation

KavinTheG
Copy link
Contributor

fixes #160111

@llvmbot
Copy link
Member

llvmbot commented Oct 9, 2025

@llvm/pr-subscribers-backend-x86

Author: Kavin Gnanapandithan (KavinTheG)

Changes

fixes llvm#160111


Patch is 22.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162656.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+68-10)
  • (modified) llvm/test/CodeGen/X86/isint.ll (+452-7)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2feb76e0eb7b4..946dbf8361aaf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19885,7 +19885,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
   // TODO: Allow FP_TO_UINT.
   SDValue CastToInt = CastToFP.getOperand(0);
   MVT VT = CastToFP.getSimpleValueType();
-  if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector())
+  if ((CastToInt.getOpcode() != ISD::FP_TO_SINT &&
+       CastToInt.getOpcode() != ISD::FP_TO_UINT) ||
+      VT.isVector())
     return SDValue();
 
   MVT IntVT = CastToInt.getSimpleValueType();
@@ -19897,22 +19899,78 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
   // See if we have 128-bit vector cast instructions for this type of cast.
   // We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
   if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
-      IntVT != MVT::i32)
+      !(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))
     return SDValue();
 
   unsigned SrcSize = SrcVT.getSizeInBits();
   unsigned IntSize = IntVT.getSizeInBits();
   unsigned VTSize = VT.getSizeInBits();
-  MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
-  MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
-  MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);
+  unsigned ToIntOpcode, ToFPOpcode;
+  unsigned Width = 128;
+  bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
 
-  // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
-  unsigned ToIntOpcode =
-      SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
-  unsigned ToFPOpcode =
-      IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+  if (IntVT == MVT::i32) {
+    if (IsUnsigned && !Subtarget.hasVLX())
+      return SDValue(); // Need AVX512VL for unsigned i32
 
+    if (Subtarget.hasVLX()) {
+      if (IsUnsigned) {
+        ToIntOpcode =
+            SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+        ToFPOpcode =
+            IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+      } else {
+        ToIntOpcode =
+            SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+        ToFPOpcode =
+            IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+      }
+    } else {
+      // SSE2
+      ToIntOpcode =
+          SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+      ToFPOpcode =
+          IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+    }
+  } else {
+    if (Subtarget.hasVLX()) {
+      if (IsUnsigned) {
+        ToIntOpcode =
+            SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+        ToFPOpcode =
+            IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+      } else {
+        ToIntOpcode =
+            SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+        ToFPOpcode =
+            IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+      }
+    } else {
+      // Need to extend width for AVX512DQ
+      Width = 512;
+      ToIntOpcode = CastToInt.getOpcode();
+      ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+    }
+  }
+
+  MVT VecSrcVT;
+  MVT VecIntVT;
+  MVT VecVT;
+  if (IntVT == MVT::i64) {
+    unsigned NumElts = Width / IntSize;
+    VecIntVT = MVT::getVectorVT(IntVT, NumElts);
+
+    // minimum legal size is v4f32
+    unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
+    unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
+
+    VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
+    VecVT = MVT::getVectorVT(VT, VTElts);
+  } else {
+    VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
+    VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
+    VecVT = MVT::getVectorVT(VT, Width / VTSize);
+  }
   // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
   //
   // We are not defining the high elements (for example, zero them) because
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 8a56f49a6c755..d0b340ce37875 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -1,7 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s
 ; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s
-
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f | FileCheck -check-prefix=AVX512-NODQ %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq | FileCheck -check-prefix=AVX512-NODQ %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck -check-prefix=AVX512VL %s
 ; PR19059
 
 define i32 @isint_return(double %d) nounwind {
@@ -24,6 +26,22 @@ define i32 @isint_return(double %d) nounwind {
 ; CHECK32-NEXT:    movd %xmm1, %eax
 ; CHECK32-NEXT:    andl $1, %eax
 ; CHECK32-NEXT:    retl
+;
+; AVX512-NODQ-LABEL: isint_return:
+; AVX512-NODQ:       # %bb.0:
+; AVX512-NODQ-NEXT:    vcvttpd2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT:    vcvtdq2pd %xmm1, %xmm1
+; AVX512-NODQ-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT:    kmovw %k0, %eax
+; AVX512-NODQ-NEXT:    retq
+;
+; AVX512VL-LABEL: isint_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm1
+; AVX512VL-NEXT:    vcvtdq2pd %xmm1, %xmm1
+; AVX512VL-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %eax
+; AVX512VL-NEXT:    retq
   %i = fptosi double %d to i32
   %e = sitofp i32 %i to double
   %c = fcmp oeq double %d, %e
@@ -50,6 +68,221 @@ define i32 @isint_float_return(float %f) nounwind {
 ; CHECK32-NEXT:    movd %xmm1, %eax
 ; CHECK32-NEXT:    andl $1, %eax
 ; CHECK32-NEXT:    retl
+;
+; AVX512-NODQ-LABEL: isint_float_return:
+; AVX512-NODQ:       # %bb.0:
+; AVX512-NODQ-NEXT:    vcvttps2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT:    vcvtdq2ps %xmm1, %xmm1
+; AVX512-NODQ-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT:    kmovw %k0, %eax
+; AVX512-NODQ-NEXT:    retq
+;
+; AVX512VL-LABEL: isint_float_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm1
+; AVX512VL-NEXT:    vcvtdq2ps %xmm1, %xmm1
+; AVX512VL-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %eax
+; AVX512VL-NEXT:    retq
+  %i = fptosi float %f to i32
+  %g = sitofp i32 %i to float
+  %c = fcmp oeq float %f, %g
+  %z = zext i1 %c to i32
+  ret i32 %z
+}
+
+define i64 @isint64_float_return(float %f) nounwind {
+; CHECK64-LABEL: isint64_float_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttss2si %xmm0, %rax
+; CHECK64-NEXT:    cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT:    cmpeqss %xmm0, %xmm1
+; CHECK64-NEXT:    movd %xmm1, %eax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isint64_float_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %ebp
+; CHECK32-NEXT:    movl %esp, %ebp
+; CHECK32-NEXT:    andl $-8, %esp
+; CHECK32-NEXT:    subl $32, %esp
+; CHECK32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    orl $3072, %eax # imm = 0xC00
+; CHECK32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK32-NEXT:    movlps %xmm1, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    cmpeqss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT:    movd %xmm0, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    xorl %edx, %edx
+; CHECK32-NEXT:    movl %ebp, %esp
+; CHECK32-NEXT:    popl %ebp
+; CHECK32-NEXT:    retl
+;
+; AVX512VL-LABEL: isint64_float_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttps2qq %xmm0, %xmm1
+; AVX512VL-NEXT:    vcvtqq2ps %xmm1, %xmm1
+; AVX512VL-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %eax
+; AVX512VL-NEXT:    retq
+  %i = fptosi float %f to i64
+  %g = sitofp i64 %i to float
+  %c = fcmp oeq float %f, %g
+  %z = zext i1 %c to i64
+  ret i64 %z
+}
+
+define i64 @isint64_return(double %d) nounwind {
+; CHECK64-LABEL: isint64_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttsd2si %xmm0, %rax
+; CHECK64-NEXT:    cvtsi2sd %rax, %xmm1
+; CHECK64-NEXT:    cmpeqsd %xmm0, %xmm1
+; CHECK64-NEXT:    movq %xmm1, %rax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isint64_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %ebp
+; CHECK32-NEXT:    movl %esp, %ebp
+; CHECK32-NEXT:    andl $-8, %esp
+; CHECK32-NEXT:    subl $32, %esp
+; CHECK32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    orl $3072, %eax # imm = 0xC00
+; CHECK32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK32-NEXT:    movlps %xmm1, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fstpl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    cmpeqsd {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT:    movd %xmm0, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    xorl %edx, %edx
+; CHECK32-NEXT:    movl %ebp, %esp
+; CHECK32-NEXT:    popl %ebp
+; CHECK32-NEXT:    retl
+;
+; AVX512VL-LABEL: isint64_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttpd2qq %xmm0, %xmm1
+; AVX512VL-NEXT:    vcvtqq2pd %xmm1, %xmm1
+; AVX512VL-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %eax
+; AVX512VL-NEXT:    retq
+  %i = fptosi double %d to i64
+  %g = sitofp i64 %i to double
+  %c = fcmp oeq double %d, %g
+  %z = zext i1 %c to i64
+  ret i64 %z
+}
+
+define i32 @isuint_return(double %d) nounwind {
+; CHECK64-LABEL: isuint_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttsd2si %xmm0, %rax
+; CHECK64-NEXT:    movl %eax, %eax
+; CHECK64-NEXT:    cvtsi2sd %rax, %xmm1
+; CHECK64-NEXT:    cmpeqsd %xmm0, %xmm1
+; CHECK64-NEXT:    movq %xmm1, %rax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isuint_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT:    cvttsd2si %xmm0, %eax
+; CHECK32-NEXT:    movl %eax, %ecx
+; CHECK32-NEXT:    sarl $31, %ecx
+; CHECK32-NEXT:    movapd %xmm0, %xmm1
+; CHECK32-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT:    cvttsd2si %xmm1, %edx
+; CHECK32-NEXT:    andl %ecx, %edx
+; CHECK32-NEXT:    orl %eax, %edx
+; CHECK32-NEXT:    movd %edx, %xmm1
+; CHECK32-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT:    cmpeqsd %xmm0, %xmm1
+; CHECK32-NEXT:    movd %xmm1, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    retl
+;
+; AVX512-NODQ-LABEL: isuint_return:
+; AVX512-NODQ:       # %bb.0:
+; AVX512-NODQ-NEXT:    vcvttsd2usi %xmm0, %eax
+; AVX512-NODQ-NEXT:    vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT:    kmovw %k0, %eax
+; AVX512-NODQ-NEXT:    retq
+;
+; AVX512VL-LABEL: isuint_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %eax
+; AVX512VL-NEXT:    vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512VL-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %eax
+; AVX512VL-NEXT:    retq
+  %i = fptoui double %d to i32
+  %e = uitofp i32 %i to double
+  %c = fcmp oeq double %d, %e
+  %z = zext i1 %c to i32
+  ret i32 %z
+}
+
+define i32 @isuint_float_return(float %f) nounwind {
+; CHECK64-LABEL: isuint_float_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttps2dq %xmm0, %xmm1
+; CHECK64-NEXT:    cvtdq2ps %xmm1, %xmm1
+; CHECK64-NEXT:    cmpeqss %xmm0, %xmm1
+; CHECK64-NEXT:    movd %xmm1, %eax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isuint_float_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT:    cvttps2dq %xmm0, %xmm1
+; CHECK32-NEXT:    cvtdq2ps %xmm1, %xmm1
+; CHECK32-NEXT:    cmpeqss %xmm0, %xmm1
+; CHECK32-NEXT:    movd %xmm1, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    retl
+;
+; AVX512-NODQ-LABEL: isuint_float_return:
+; AVX512-NODQ:       # %bb.0:
+; AVX512-NODQ-NEXT:    vcvttps2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT:    vcvtdq2ps %xmm1, %xmm1
+; AVX512-NODQ-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT:    kmovw %k0, %eax
+; AVX512-NODQ-NEXT:    retq
+;
+; AVX512VL-LABEL: isuint_float_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm1
+; AVX512VL-NEXT:    vcvtdq2ps %xmm1, %xmm1
+; AVX512VL-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %eax
+; AVX512VL-NEXT:    retq
   %i = fptosi float %f to i32
   %g = sitofp i32 %i to float
   %c = fcmp oeq float %f, %g
@@ -57,6 +290,190 @@ define i32 @isint_float_return(float %f) nounwind {
   ret i32 %z
 }
 
+define i64 @isuint64_return(double %d) nounwind {
+; CHECK64-LABEL: isuint64_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttsd2si %xmm0, %rax
+; CHECK64-NEXT:    movq %rax, %rcx
+; CHECK64-NEXT:    sarq $63, %rcx
+; CHECK64-NEXT:    movapd %xmm0, %xmm1
+; CHECK64-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT:    cvttsd2si %xmm1, %rdx
+; CHECK64-NEXT:    andq %rcx, %rdx
+; CHECK64-NEXT:    orq %rax, %rdx
+; CHECK64-NEXT:    movq %rdx, %xmm1
+; CHECK64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK64-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT:    movapd %xmm1, %xmm2
+; CHECK64-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; CHECK64-NEXT:    addsd %xmm1, %xmm2
+; CHECK64-NEXT:    cmpeqsd %xmm0, %xmm2
+; CHECK64-NEXT:    movq %xmm2, %rax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isuint64_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %ebp
+; CHECK32-NEXT:    movl %esp, %ebp
+; CHECK32-NEXT:    andl $-8, %esp
+; CHECK32-NEXT:    subl $16, %esp
+; CHECK32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; CHECK32-NEXT:    ucomisd %xmm0, %xmm1
+; CHECK32-NEXT:    jbe .LBB6_2
+; CHECK32-NEXT:  # %bb.1:
+; CHECK32-NEXT:    xorpd %xmm1, %xmm1
+; CHECK32-NEXT:  .LBB6_2:
+; CHECK32-NEXT:    movapd %xmm0, %xmm2
+; CHECK32-NEXT:    subsd %xmm1, %xmm2
+; CHECK32-NEXT:    movsd %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    setbe %al
+; CHECK32-NEXT:    fldl {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    orl $3072, %ecx # imm = 0xC00
+; CHECK32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movzbl %al, %eax
+; CHECK32-NEXT:    shll $31, %eax
+; CHECK32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movd %eax, %xmm1
+; CHECK32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; CHECK32-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; CHECK32-NEXT:    movapd %xmm2, %xmm1
+; CHECK32-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
+; CHECK32-NEXT:    addsd %xmm2, %xmm1
+; CHECK32-NEXT:    cmpeqsd %xmm0, %xmm1
+; CHECK32-NEXT:    movd %xmm1, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    xorl %edx, %edx
+; CHECK32-NEXT:    movl %ebp, %esp
+; CHECK32-NEXT:    popl %ebp
+; CHECK32-NEXT:    retl
+;
+; AVX512-NODQ-LABEL: isuint64_return:
+; AVX512-NODQ:       # %bb.0:
+; AVX512-NODQ-NEXT:    vcvttsd2usi %xmm0, %rax
+; AVX512-NODQ-NEXT:    vcvtusi2sd %rax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT:    kmovw %k0, %eax
+; AVX512-NODQ-NEXT:    retq
+;
+; AVX512VL-LABEL: isuint64_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
+; AVX512VL-NEXT:    vcvtusi2sd %rax, %xmm15, %xmm1
+; AVX512VL-NEXT:    vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %eax
+; AVX512VL-NEXT:    retq
+  %i = fptoui double %d to i64
+  %e = uitofp i64 %i to double
+  %c = fcmp oeq double %d, %e
+  %z = zext i1 %c to i64
+  ret i64 %z
+}
+
+define i64 @isuint64_float_return(float %f) nounwind {
+; CHECK64-LABEL: isuint64_float_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttss2si %xmm0, %rcx
+; CHECK64-NEXT:    movq %rcx, %rdx
+; CHECK64-NEXT:    sarq $63, %rdx
+; CHECK64-NEXT:    movaps %xmm0, %xmm1
+; CHECK64-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT:    cvttss2si %xmm1, %rax
+; CHECK64-NEXT:    andq %rdx, %rax
+; CHECK64-NEXT:    orq %rcx, %rax
+; CHECK64-NEXT:    js .LBB7_1
+; CHECK64-NEXT:  # %bb.2:
+; CHECK64-NEXT:    xorps %xmm1, %xmm1
+; CHECK64-NEXT:    cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT:    jmp .LBB7_3
+; CHECK64-NEXT:  .LBB7_1:
+; CHECK64-NEXT:    movq %rax, %rcx
+; CHECK64-NEXT:    shrq %rcx
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    orq %rcx, %rax
+; CHECK64-NEXT:    xorps %xmm1, %xmm1
+; CHECK64-NEXT:    cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT:    addss %xmm1, %xmm1
+; CHECK64-NEXT:  .LBB7_3:
+; CHECK64-NEXT:    cmpeqss %xmm1, %xmm0
+; CHECK64-NEXT:    movd %xmm0, %eax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isuint64_float_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    pushl %ebp
+; CHECK32-NEXT:    movl %esp, %ebp
+; CHECK32-NEXT:    andl $-8, %esp
+; CHECK32-NEXT:    subl $32, %esp
+; CHECK32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
+; CHECK32-NEXT:    ucomiss %xmm0, %xmm1
+; CHECK32-NEXT:    jbe .LBB7_2
+; CHECK32-NEXT:  # %bb.1:
+; CHECK32-NEXT:    xorps %xmm1, %xmm1
+; CHECK32-NEXT:  .LBB7_2:
+; CHECK32-NEXT:    movaps %xmm0, %xmm2
+; CHECK32-NEXT:    subss %xmm1, %xmm2
+; CHECK32-NEXT:    movss %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    setbe %al
+; CHECK32-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    orl $3072, %ecx # imm = 0xC00
+; CHECK32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    movzbl %al, %eax
+; CHECK32-NEXT:    shll $31, %eax
+; CHECK32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movd %eax, %xmm1
+; CHECK32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK32-NEXT:    movq %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    shrl $31, %eax
+; CHECK32-NEXT:    fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
+; CHECK32-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK32-NEXT:    cmpeqss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT:    movd %xmm0, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    xorl %edx, %edx
+; CHECK32-NEXT:    movl %ebp, %esp
+; CHECK32-NEXT:    popl %ebp
+; CHECK32-NEXT:    retl
+;
+; AVX512-NODQ-LABEL: isuint64_float_return:
+; AVX512-NODQ:       # %bb.0:
+; AVX512-NODQ-NEXT:    vcvttss2usi %xmm0, %rax
+; AVX512-NODQ-NEXT:    vcvtusi2ss %rax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT:    kmovw %k0, %eax
+; AVX512-NODQ-NEXT:    retq
+;
+; AVX512VL-LABEL: isuint64_float_return:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttss2u...
[truncated]

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems more complicated than it should be - maybe just start with the AVX512DQ i64 handling in this patch?

; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s

; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s
; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHECK32 -> X86

; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X64 %s
; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s

; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHECK64 -> SSE2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[X86] lowerFPToIntToFP - handle UI2FP on AVX512VL targets and i64 types on AVX512DQ targets

3 participants