Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 48 additions & 11 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19909,7 +19909,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// TODO: Allow FP_TO_UINT.
SDValue CastToInt = CastToFP.getOperand(0);
MVT VT = CastToFP.getSimpleValueType();
if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector())
if ((CastToInt.getOpcode() != ISD::FP_TO_SINT &&
CastToInt.getOpcode() != ISD::FP_TO_UINT) ||
VT.isVector())
return SDValue();

MVT IntVT = CastToInt.getSimpleValueType();
Expand All @@ -19921,22 +19923,57 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
// See if we have 128-bit vector cast instructions for this type of cast.
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
IntVT != MVT::i32)
!(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))
return SDValue();

unsigned SrcSize = SrcVT.getSizeInBits();
unsigned IntSize = IntVT.getSizeInBits();
unsigned VTSize = VT.getSizeInBits();
MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);

// We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
unsigned ToIntOpcode =
SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
unsigned ToFPOpcode =
IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
unsigned ToIntOpcode, ToFPOpcode;
unsigned Width = 128;
bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;

if (Subtarget.hasVLX() && IntVT == MVT::i64) {
// AVX512DQ+VLX
if (IsUnsigned) {
ToIntOpcode =
SrcSize != IntSize ? X86ISD::CVTTP2UI : (unsigned)ISD::FP_TO_UINT;
ToFPOpcode =
IntSize != VTSize ? X86ISD::CVTUI2P : (unsigned)ISD::UINT_TO_FP;
} else {
ToIntOpcode =
SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
ToFPOpcode =
IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
}
} else if (IntVT == MVT::i64) {
// Need to extend width for AVX512DQ without AVX512VL
Width = 512;
ToIntOpcode = CastToInt.getOpcode();
ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
} else {
// SSE2
ToIntOpcode =
SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
ToFPOpcode =
IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
}

MVT VecSrcVT, VecIntVT, VecVT;
unsigned NumElts = Width / IntSize;
VecIntVT = MVT::getVectorVT(IntVT, NumElts);
unsigned SrcElts, VTElts;
// vcvttps2qq cannot convert v16f32 <-> v8i64
if (IntVT == MVT::i64 && Width == 512) {
SrcElts = NumElts;
VTElts = NumElts;
} else {
SrcElts = Width / SrcSize;
VTElts = Width / VTSize;
}

VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
VecVT = MVT::getVectorVT(VT, VTElts);
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
//
// We are not defining the high elements (for example, zero them) because
Expand Down
50 changes: 40 additions & 10 deletions llvm/test/CodeGen/X86/fp-int-fp-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,26 @@ define double @scvtf64_i64(double %a0) {
; SSE-NEXT: cvtsi2sd %rax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: scvtf64_i64:
; AVX: # %bb.0:
; AVX-NEXT: vcvttsd2si %xmm0, %rax
; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX-NEXT: retq
; AVX2-LABEL: scvtf64_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vcvttsd2si %xmm0, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX2-NEXT: retq
;
; AVX512-VL-LABEL: scvtf64_i64:
; AVX512-VL: # %bb.0:
; AVX512-VL-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512-VL-NEXT: vcvtqq2pd %xmm0, %xmm0
; AVX512-VL-NEXT: retq
;
; AVX512-NOVL-LABEL: scvtf64_i64:
; AVX512-NOVL: # %bb.0:
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NOVL-NEXT: vcvttpd2qq %zmm0, %zmm0
; AVX512-NOVL-NEXT: vcvtqq2pd %zmm0, %zmm0
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NOVL-NEXT: vzeroupper
; AVX512-NOVL-NEXT: retq
%ii = fptosi double %a0 to i64
%ff = sitofp i64 %ii to double
ret double %ff
Expand Down Expand Up @@ -69,11 +84,26 @@ define float @scvtf32_i64(float %a0) {
; SSE-NEXT: cvtsi2ss %rax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: scvtf32_i64:
; AVX: # %bb.0:
; AVX-NEXT: vcvttss2si %xmm0, %rax
; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX-NEXT: retq
; AVX2-LABEL: scvtf32_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vcvttss2si %xmm0, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX2-NEXT: retq
;
; AVX512-VL-LABEL: scvtf32_i64:
; AVX512-VL: # %bb.0:
; AVX512-VL-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512-VL-NEXT: vcvtqq2ps %xmm0, %xmm0
; AVX512-VL-NEXT: retq
;
; AVX512-NOVL-LABEL: scvtf32_i64:
; AVX512-NOVL: # %bb.0:
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NOVL-NEXT: vcvttps2qq %ymm0, %zmm0
; AVX512-NOVL-NEXT: vcvtqq2ps %zmm0, %ymm0
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512-NOVL-NEXT: vzeroupper
; AVX512-NOVL-NEXT: retq
%ii = fptosi float %a0 to i64
%ff = sitofp i64 %ii to float
ret float %ff
Expand Down
Loading