Skip to content

Commit

Permalink
[x86] use vector instructions to lower more FP->int->FP casts
Browse files Browse the repository at this point in the history
This is an enhancement to D77895 to avoid another
round-trip from XMM->GPR->XMM. This time we handle
the case of starting/ending with an f64 and casting
to signed i32 as the intermediate value.

It's a bit more involved than I initially assumed
because we need to use target-specific opcodes to
represent the non-standard cast ops.

Differential Revision: https://reviews.llvm.org/D78362
  • Loading branch information
rotateright committed Apr 19, 2020
1 parent 8c68de2 commit cceb630
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 29 deletions.
26 changes: 17 additions & 9 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -19178,17 +19178,25 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
MVT IntVT = CastToInt.getSimpleValueType();
SDValue X = CastToInt.getOperand(0);
// TODO: Allow size-changing from source to dest (double -> i32 -> float)
if (X.getSimpleValueType() != VT ||
VT.getSizeInBits() != IntVT.getSizeInBits())
if (X.getSimpleValueType() != VT)
return SDValue();

// See if we have a 128-bit vector cast op for this type of cast.
unsigned NumEltsInXMM = 128 / VT.getScalarSizeInBits();
MVT VecFPVT = MVT::getVectorVT(VT, NumEltsInXMM);
MVT VecIntVT = MVT::getVectorVT(IntVT, NumEltsInXMM);
if (!useVectorCast(CastToFP.getOpcode(), VecIntVT, VecFPVT, Subtarget))
// See if we have 128-bit vector cast instructions for this type of cast.
// We need cvttps2dq + cvtdq2ps or cvttpd2dq + cvtdq2pd.
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
IntVT != MVT::i32)
return SDValue();

unsigned NumFPEltsInXMM = 128 / VT.getScalarSizeInBits();
unsigned NumIntEltsInXMM = 128 / IntVT.getScalarSizeInBits();
MVT VecFPVT = MVT::getVectorVT(VT, NumFPEltsInXMM);
MVT VecIntVT = MVT::getVectorVT(IntVT, NumIntEltsInXMM);

// We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
bool NeedX86Opcodes = VT.getSizeInBits() != IntVT.getSizeInBits();
unsigned ToIntOpcode = NeedX86Opcodes ? X86ISD::CVTTP2SI : ISD::FP_TO_SINT;
unsigned ToFPOpcode = NeedX86Opcodes ? X86ISD::CVTSI2P : ISD::SINT_TO_FP;

// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
//
// We are not defining the high elements (for example, zero them) because
Expand All @@ -19198,8 +19206,8 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
SDLoc DL(CastToFP);
SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecFPVT, X);
SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, VecIntVT, VecX);
SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, VecFPVT, VCastToInt);
SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX);
SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecFPVT, VCastToInt);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
}

Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/X86/ftrunc.ll
Expand Up @@ -263,15 +263,14 @@ define float @trunc_signed_f32_nsz(float %x) #0 {
define double @trunc_signed32_f64_no_fast_math(double %x) {
; SSE-LABEL: trunc_signed32_f64_no_fast_math:
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm0, %eax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_signed32_f64_no_fast_math:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvttsd2si %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0
; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptosi double %x to i32
%r = sitofp i32 %i to double
Expand All @@ -281,9 +280,8 @@ define double @trunc_signed32_f64_no_fast_math(double %x) {
define double @trunc_signed32_f64_nsz(double %x) #0 {
; SSE2-LABEL: trunc_signed32_f64_nsz:
; SSE2: # %bb.0:
; SSE2-NEXT: cvttsd2si %xmm0, %eax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2sd %eax, %xmm0
; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: trunc_signed32_f64_nsz:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/isint.ll
Expand Up @@ -7,8 +7,8 @@
define i32 @isint_return(double %d) nounwind {
; CHECK64-LABEL: isint_return:
; CHECK64: # %bb.0:
; CHECK64-NEXT: cvttsd2si %xmm0, %eax
; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
; CHECK64-NEXT: movq %xmm1, %rax
; CHECK64-NEXT: andl $1, %eax
Expand All @@ -18,8 +18,8 @@ define i32 @isint_return(double %d) nounwind {
; CHECK32-LABEL: isint_return:
; CHECK32: # %bb.0:
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK32-NEXT: cvttsd2si %xmm0, %eax
; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
; CHECK32-NEXT: movd %xmm1, %eax
; CHECK32-NEXT: andl $1, %eax
Expand Down Expand Up @@ -62,8 +62,8 @@ declare void @foo()
define void @isint_branch(double %d) nounwind {
; CHECK64-LABEL: isint_branch:
; CHECK64: # %bb.0:
; CHECK64-NEXT: cvttsd2si %xmm0, %eax
; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK64-NEXT: ucomisd %xmm1, %xmm0
; CHECK64-NEXT: jne .LBB2_2
; CHECK64-NEXT: jp .LBB2_2
Expand All @@ -77,8 +77,8 @@ define void @isint_branch(double %d) nounwind {
; CHECK32-LABEL: isint_branch:
; CHECK32: # %bb.0:
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK32-NEXT: cvttsd2si %xmm0, %eax
; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK32-NEXT: ucomisd %xmm1, %xmm0
; CHECK32-NEXT: jne .LBB2_2
; CHECK32-NEXT: jp .LBB2_2
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/setoeq.ll
Expand Up @@ -5,8 +5,8 @@ define zeroext i8 @t(double %x) nounwind readnone {
; CHECK-LABEL: t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvttsd2si %xmm0, %eax
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK-NEXT: cmpeqsd %xmm0, %xmm1
; CHECK-NEXT: movd %xmm1, %eax
; CHECK-NEXT: andl $1, %eax
Expand All @@ -24,8 +24,8 @@ define zeroext i8 @u(double %x) nounwind readnone {
; CHECK-LABEL: u:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvttsd2si %xmm0, %eax
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK-NEXT: cmpneqsd %xmm0, %xmm1
; CHECK-NEXT: movd %xmm1, %eax
; CHECK-NEXT: andl $1, %eax
Expand Down

0 comments on commit cceb630

Please sign in to comment.