-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] lowerFPToIntToFP - handle UI2FP on AVX512VL targets and i64 types on AVX512DQ targets #162656
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: Kavin Gnanapandithan (KavinTheG) Changesfixes llvm#160111 Patch is 22.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162656.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2feb76e0eb7b4..946dbf8361aaf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19885,7 +19885,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// TODO: Allow FP_TO_UINT.
SDValue CastToInt = CastToFP.getOperand(0);
MVT VT = CastToFP.getSimpleValueType();
- if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector())
+ if ((CastToInt.getOpcode() != ISD::FP_TO_SINT &&
+ CastToInt.getOpcode() != ISD::FP_TO_UINT) ||
+ VT.isVector())
return SDValue();
MVT IntVT = CastToInt.getSimpleValueType();
@@ -19897,22 +19899,78 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// See if we have 128-bit vector cast instructions for this type of cast.
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
- IntVT != MVT::i32)
+ !(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))
return SDValue();
unsigned SrcSize = SrcVT.getSizeInBits();
unsigned IntSize = IntVT.getSizeInBits();
unsigned VTSize = VT.getSizeInBits();
- MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
- MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
- MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);
+ unsigned ToIntOpcode, ToFPOpcode;
+ unsigned Width = 128;
+ bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
- // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
- unsigned ToIntOpcode =
- SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
- unsigned ToFPOpcode =
- IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ if (IntVT == MVT::i32) {
+ if (IsUnsigned && !Subtarget.hasVLX())
+ return SDValue(); // Need AVX512VL for unsigned i32
+ if (Subtarget.hasVLX()) {
+ if (IsUnsigned) {
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+ } else {
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+ } else {
+ // SSE2
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+ } else {
+ if (Subtarget.hasVLX()) {
+ if (IsUnsigned) {
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
+ } else {
+ ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+ }
+ } else {
+ // Need to extend width for AVX512DQ
+ Width = 512;
+ ToIntOpcode = CastToInt.getOpcode();
+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
+ }
+ }
+
+ MVT VecSrcVT;
+ MVT VecIntVT;
+ MVT VecVT;
+ if (IntVT == MVT::i64) {
+ unsigned NumElts = Width / IntSize;
+ VecIntVT = MVT::getVectorVT(IntVT, NumElts);
+
+ // minimum legal size is v4f32
+ unsigned SrcElts = (SrcVT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
+ unsigned VTElts = (VT == MVT::f32) ? std::max(NumElts, 4u) : NumElts;
+
+ VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
+ VecVT = MVT::getVectorVT(VT, VTElts);
+ } else {
+ VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
+ VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
+ VecVT = MVT::getVectorVT(VT, Width / VTSize);
+ }
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
//
// We are not defining the high elements (for example, zero them) because
diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 8a56f49a6c755..d0b340ce37875 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s
; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s
-
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f | FileCheck -check-prefix=AVX512-NODQ %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq | FileCheck -check-prefix=AVX512-NODQ %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck -check-prefix=AVX512VL %s
; PR19059
define i32 @isint_return(double %d) nounwind {
@@ -24,6 +26,22 @@ define i32 @isint_return(double %d) nounwind {
; CHECK32-NEXT: movd %xmm1, %eax
; CHECK32-NEXT: andl $1, %eax
; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isint_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isint_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2pd %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
%i = fptosi double %d to i32
%e = sitofp i32 %i to double
%c = fcmp oeq double %d, %e
@@ -50,6 +68,221 @@ define i32 @isint_float_return(float %f) nounwind {
; CHECK32-NEXT: movd %xmm1, %eax
; CHECK32-NEXT: andl $1, %eax
; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isint_float_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isint_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptosi float %f to i32
+ %g = sitofp i32 %i to float
+ %c = fcmp oeq float %f, %g
+ %z = zext i1 %c to i32
+ ret i32 %z
+}
+
+define i64 @isint64_float_return(float %f) nounwind {
+; CHECK64-LABEL: isint64_float_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttss2si %xmm0, %rax
+; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT: cmpeqss %xmm0, %xmm1
+; CHECK64-NEXT: movd %xmm1, %eax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isint64_float_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $32, %esp
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00
+; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fstps {{[0-9]+}}(%esp)
+; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT: movd %xmm0, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512VL-LABEL: isint64_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2qq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtqq2ps %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptosi float %f to i64
+ %g = sitofp i64 %i to float
+ %c = fcmp oeq float %f, %g
+ %z = zext i1 %c to i64
+ ret i64 %z
+}
+
+define i64 @isint64_return(double %d) nounwind {
+; CHECK64-LABEL: isint64_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttsd2si %xmm0, %rax
+; CHECK64-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK64-NEXT: movq %xmm1, %rax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isint64_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $32, %esp
+; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: orl $3072, %eax # imm = 0xC00
+; CHECK32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK32-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fstpl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: cmpeqsd {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT: movd %xmm0, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512VL-LABEL: isint64_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2qq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtqq2pd %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptosi double %d to i64
+ %g = sitofp i64 %i to double
+ %c = fcmp oeq double %d, %g
+ %z = zext i1 %c to i64
+ ret i64 %z
+}
+
+define i32 @isuint_return(double %d) nounwind {
+; CHECK64-LABEL: isuint_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttsd2si %xmm0, %rax
+; CHECK64-NEXT: movl %eax, %eax
+; CHECK64-NEXT: cvtsi2sd %rax, %xmm1
+; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK64-NEXT: movq %xmm1, %rax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT: cvttsd2si %xmm0, %eax
+; CHECK32-NEXT: movl %eax, %ecx
+; CHECK32-NEXT: sarl $31, %ecx
+; CHECK32-NEXT: movapd %xmm0, %xmm1
+; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT: cvttsd2si %xmm1, %edx
+; CHECK32-NEXT: andl %ecx, %edx
+; CHECK32-NEXT: orl %eax, %edx
+; CHECK32-NEXT: movd %edx, %xmm1
+; CHECK32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK32-NEXT: movd %xmm1, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NODQ-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512VL-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptoui double %d to i32
+ %e = uitofp i32 %i to double
+ %c = fcmp oeq double %d, %e
+ %z = zext i1 %c to i32
+ ret i32 %z
+}
+
+define i32 @isuint_float_return(float %f) nounwind {
+; CHECK64-LABEL: isuint_float_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttps2dq %xmm0, %xmm1
+; CHECK64-NEXT: cvtdq2ps %xmm1, %xmm1
+; CHECK64-NEXT: cmpeqss %xmm0, %xmm1
+; CHECK64-NEXT: movd %xmm1, %eax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint_float_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: cvttps2dq %xmm0, %xmm1
+; CHECK32-NEXT: cvtdq2ps %xmm1, %xmm1
+; CHECK32-NEXT: cmpeqss %xmm0, %xmm1
+; CHECK32-NEXT: movd %xmm1, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint_float_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512-NODQ-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm1
+; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX512VL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
%i = fptosi float %f to i32
%g = sitofp i32 %i to float
%c = fcmp oeq float %f, %g
@@ -57,6 +290,190 @@ define i32 @isint_float_return(float %f) nounwind {
ret i32 %z
}
+define i64 @isuint64_return(double %d) nounwind {
+; CHECK64-LABEL: isuint64_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttsd2si %xmm0, %rax
+; CHECK64-NEXT: movq %rax, %rcx
+; CHECK64-NEXT: sarq $63, %rcx
+; CHECK64-NEXT: movapd %xmm0, %xmm1
+; CHECK64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT: cvttsd2si %xmm1, %rdx
+; CHECK64-NEXT: andq %rcx, %rdx
+; CHECK64-NEXT: orq %rax, %rdx
+; CHECK64-NEXT: movq %rdx, %xmm1
+; CHECK64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT: movapd %xmm1, %xmm2
+; CHECK64-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; CHECK64-NEXT: addsd %xmm1, %xmm2
+; CHECK64-NEXT: cmpeqsd %xmm0, %xmm2
+; CHECK64-NEXT: movq %xmm2, %rax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint64_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $16, %esp
+; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; CHECK32-NEXT: ucomisd %xmm0, %xmm1
+; CHECK32-NEXT: jbe .LBB6_2
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: xorpd %xmm1, %xmm1
+; CHECK32-NEXT: .LBB6_2:
+; CHECK32-NEXT: movapd %xmm0, %xmm2
+; CHECK32-NEXT: subsd %xmm1, %xmm2
+; CHECK32-NEXT: movsd %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: setbe %al
+; CHECK32-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00
+; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzbl %al, %eax
+; CHECK32-NEXT: shll $31, %eax
+; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movd %eax, %xmm1
+; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; CHECK32-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; CHECK32-NEXT: movapd %xmm2, %xmm1
+; CHECK32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
+; CHECK32-NEXT: addsd %xmm2, %xmm1
+; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
+; CHECK32-NEXT: movd %xmm1, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint64_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512-NODQ-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint64_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512VL-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
+; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %eax
+; AVX512VL-NEXT: retq
+ %i = fptoui double %d to i64
+ %e = uitofp i64 %i to double
+ %c = fcmp oeq double %d, %e
+ %z = zext i1 %c to i64
+ ret i64 %z
+}
+
+define i64 @isuint64_float_return(float %f) nounwind {
+; CHECK64-LABEL: isuint64_float_return:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: cvttss2si %xmm0, %rcx
+; CHECK64-NEXT: movq %rcx, %rdx
+; CHECK64-NEXT: sarq $63, %rdx
+; CHECK64-NEXT: movaps %xmm0, %xmm1
+; CHECK64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK64-NEXT: cvttss2si %xmm1, %rax
+; CHECK64-NEXT: andq %rdx, %rax
+; CHECK64-NEXT: orq %rcx, %rax
+; CHECK64-NEXT: js .LBB7_1
+; CHECK64-NEXT: # %bb.2:
+; CHECK64-NEXT: xorps %xmm1, %xmm1
+; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT: jmp .LBB7_3
+; CHECK64-NEXT: .LBB7_1:
+; CHECK64-NEXT: movq %rax, %rcx
+; CHECK64-NEXT: shrq %rcx
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: orq %rcx, %rax
+; CHECK64-NEXT: xorps %xmm1, %xmm1
+; CHECK64-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK64-NEXT: addss %xmm1, %xmm1
+; CHECK64-NEXT: .LBB7_3:
+; CHECK64-NEXT: cmpeqss %xmm1, %xmm0
+; CHECK64-NEXT: movd %xmm0, %eax
+; CHECK64-NEXT: andl $1, %eax
+; CHECK64-NEXT: retq
+;
+; CHECK32-LABEL: isuint64_float_return:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: pushl %ebp
+; CHECK32-NEXT: movl %esp, %ebp
+; CHECK32-NEXT: andl $-8, %esp
+; CHECK32-NEXT: subl $32, %esp
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
+; CHECK32-NEXT: ucomiss %xmm0, %xmm1
+; CHECK32-NEXT: jbe .LBB7_2
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: xorps %xmm1, %xmm1
+; CHECK32-NEXT: .LBB7_2:
+; CHECK32-NEXT: movaps %xmm0, %xmm2
+; CHECK32-NEXT: subss %xmm1, %xmm2
+; CHECK32-NEXT: movss %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: setbe %al
+; CHECK32-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: orl $3072, %ecx # imm = 0xC00
+; CHECK32-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fistpll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fldcw {{[0-9]+}}(%esp)
+; CHECK32-NEXT: movzbl %al, %eax
+; CHECK32-NEXT: shll $31, %eax
+; CHECK32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movd %eax, %xmm1
+; CHECK32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK32-NEXT: movq %xmm2, {{[0-9]+}}(%esp)
+; CHECK32-NEXT: shrl $31, %eax
+; CHECK32-NEXT: fildll {{[0-9]+}}(%esp)
+; CHECK32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
+; CHECK32-NEXT: fstps {{[0-9]+}}(%esp)
+; CHECK32-NEXT: cmpeqss {{[0-9]+}}(%esp), %xmm0
+; CHECK32-NEXT: movd %xmm0, %eax
+; CHECK32-NEXT: andl $1, %eax
+; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: movl %ebp, %esp
+; CHECK32-NEXT: popl %ebp
+; CHECK32-NEXT: retl
+;
+; AVX512-NODQ-LABEL: isuint64_float_return:
+; AVX512-NODQ: # %bb.0:
+; AVX512-NODQ-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
+; AVX512-NODQ-NEXT: vcmpeqss %xmm1, %xmm0, %k0
+; AVX512-NODQ-NEXT: kmovw %k0, %eax
+; AVX512-NODQ-NEXT: retq
+;
+; AVX512VL-LABEL: isuint64_float_return:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttss2u...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems more complicated than it should be - maybe just start with the AVX512DQ i64 handling in this patch?
; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s | ||
|
||
; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s | ||
; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CHECK32 -> X86
; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X64 %s | ||
; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=X86 %s | ||
|
||
; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK64 %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CHECK64 -> SSE2
fixes #160111