Skip to content

Commit

Permalink
[X86] Improve for v2i32->v2f64 uint_to_fp
Browse files Browse the repository at this point in the history
This uses an alternative implementation of this conversion derived
from our v2i32->v2f32 handling. We can zero extend the v2i32 to
v2i64, or it with the bit representation of 2.0^52 which will give
us 2.0^52 plus the 32-bit integer since double's mantissa is 52 bits.
Then we just need to subtract 2.0^52 as a double and let the floating
point unit normalize the remaining bits into a valid double.

This is less instructions then our previous code, but does require
a port 5 shuffle for the zero extend or unpack.

Differential Revision: https://reviews.llvm.org/D71945
  • Loading branch information
topperc committed Jan 3, 2020
1 parent cf48101 commit 2875cc6
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 269 deletions.
50 changes: 14 additions & 36 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -18981,43 +18981,21 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);
}

// Legalize to v4i32 type.
N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
DAG.getUNDEF(MVT::v2i32));
// Zero extend to 2i64, OR with the floating point representation of 2^52.
// This gives us the floating point equivalent of 2^52 + the i32 integer
// since double has 52-bits of mantissa. Then subtract 2^52 in floating
// point leaving just our i32 integers in double format.
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0);
SDValue VBias =
DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64);
SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);

// Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT,
// but using v2i32 to v2f64 with X86ISD::CVTSI2P.
SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32);
SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);

// Two to the power of half-word-size.
SDValue TWOHW = DAG.getConstantFP((double)(1 << 16), DL, MVT::v2f64);

// Clear upper part of LO, lower HI.
SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask);

if (IsStrict) {
SDValue fHI = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL,
{MVT::v2f64, MVT::Other}, {Op.getOperand(0), HI});
fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {MVT::v2f64, MVT::Other},
{fHI.getValue(1), fHI, TWOHW});
SDValue fLO = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL,
{MVT::v2f64, MVT::Other}, {Op.getOperand(0), LO});
SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
fHI.getValue(1), fLO.getValue(1));

// Add the two halves
return DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v2f64, MVT::Other},
{Chain, fHI, fLO});
}

SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI);
fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO);

// Add the two halves.
return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO);
if (IsStrict)
return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other},
{Op.getOperand(0), Or, VBias});
return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias);
}

static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
Expand Down
57 changes: 15 additions & 42 deletions llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
Expand Up @@ -836,49 +836,22 @@ define <2 x double> @sitofp_v2i32_v2f64(<2 x i32> %x) #0 {
}

define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 {
; SSE-32-LABEL: uitofp_v2i32_v2f64:
; SSE-32: # %bb.0:
; SSE-32-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
; SSE-32-NEXT: pand %xmm0, %xmm1
; SSE-32-NEXT: cvtdq2pd %xmm1, %xmm1
; SSE-32-NEXT: psrld $16, %xmm0
; SSE-32-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-32-NEXT: mulpd {{\.LCPI.*}}, %xmm0
; SSE-32-NEXT: addpd %xmm1, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: uitofp_v2i32_v2f64:
; SSE-64: # %bb.0:
; SSE-64-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
; SSE-64-NEXT: pand %xmm0, %xmm1
; SSE-64-NEXT: cvtdq2pd %xmm1, %xmm1
; SSE-64-NEXT: psrld $16, %xmm0
; SSE-64-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-64-NEXT: mulpd {{.*}}(%rip), %xmm0
; SSE-64-NEXT: addpd %xmm1, %xmm0
; SSE-64-NEXT: retq
;
; AVX1-32-LABEL: uitofp_v2i32_v2f64:
; AVX1-32: # %bb.0:
; AVX1-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
; AVX1-32-NEXT: vcvtdq2pd %xmm1, %xmm1
; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-32-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-32-NEXT: vmulpd {{\.LCPI.*}}, %xmm0, %xmm0
; AVX1-32-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX1-32-NEXT: retl
; SSE-LABEL: uitofp_v2i32_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; SSE-NEXT: orpd %xmm1, %xmm0
; SSE-NEXT: subpd %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX1-64-LABEL: uitofp_v2i32_v2f64:
; AVX1-64: # %bb.0:
; AVX1-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
; AVX1-64-NEXT: vcvtdq2pd %xmm1, %xmm1
; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-64-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-64-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX1-64-NEXT: retq
; AVX1-LABEL: uitofp_v2i32_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: uitofp_v2i32_v2f64:
; AVX512F: # %bb.0:
Expand Down

0 comments on commit 2875cc6

Please sign in to comment.