Skip to content

Commit

Permalink
Merging r338658:
Browse files Browse the repository at this point in the history
------------------------------------------------------------------------
r338658 | nemanjai | 2018-08-02 02:03:22 +0200 (Thu, 02 Aug 2018) | 13 lines

[PowerPC] Do not round values prior to converting to integer

Adding the FP_ROUND nodes when combining FP_TO_[SU]INT of elements
feeding a BUILD_VECTOR into an FP_TO_[SU]INT of the built vector
loses precision. This patch removes the code that adds these nodes
to true f64 operands. It also adds patterns required to ensure
the code is still vectorized rather than converting individual
elements and inserting into a vector.

Fixes https://bugs.llvm.org/show_bug.cgi?id=38342

Differential Revision: https://reviews.llvm.org/D50121

------------------------------------------------------------------------

llvm-svn: 338678
  • Loading branch information
zmodem committed Aug 2, 2018
1 parent 41c19c9 commit 63740db
Show file tree
Hide file tree
Showing 3 changed files with 258 additions and 207 deletions.
22 changes: 19 additions & 3 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -11761,6 +11761,14 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}

// Is this an extending load from an f32 to an f64?
static bool isFPExtLoad(SDValue Op) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
return LD->getExtensionType() == ISD::EXTLOAD &&
Op.getValueType() == MVT::f64;
return false;
}

/// Reduces the number of fp-to-int conversion when building a vector.
///
/// If this vector is built out of floating to integer conversions,
Expand Down Expand Up @@ -11795,11 +11803,18 @@ combineElementTruncationToVectorTruncation(SDNode *N,
SmallVector<SDValue, 4> Ops;
EVT TargetVT = N->getValueType(0);
for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
SDValue NextOp = N->getOperand(i);
if (NextOp.getOpcode() != PPCISD::MFVSR)
return SDValue();
unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
unsigned NextConversion = NextOp.getOperand(0).getOpcode();
if (NextConversion != FirstConversion)
return SDValue();
// If we are converting to 32-bit integers, we need to add an FP_ROUND.
// This is not valid if the input was originally double precision. It is
// also not profitable to do unless this is an extending load in which
// case doing this combine will allow us to combine consecutive loads.
if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
return SDValue();
if (N->getOperand(i) != FirstInput)
IsSplat = false;
}
Expand All @@ -11813,8 +11828,9 @@ combineElementTruncationToVectorTruncation(SDNode *N,
// Now that we know we have the right type of node, get its operands
for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
SDValue In = N->getOperand(i).getOperand(0);
// For 32-bit values, we need to add an FP_ROUND node.
if (Is32Bit) {
// For 32-bit values, we need to add an FP_ROUND node (if we made it
// here, we know that all inputs are extending loads so this is safe).
if (In.isUndef())
Ops.push_back(DAG.getUNDEF(SrcVT));
else {
Expand Down
86 changes: 86 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrVSX.td
Expand Up @@ -3494,6 +3494,17 @@ def DblToFlt {
dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
}

def ExtDbl {
dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0))))));
dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1))))));
dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0))))));
dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1))))));
dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0))))));
dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1))))));
dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0))))));
dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1))))));
}

def ByteToWord {
dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
Expand Down Expand Up @@ -3571,9 +3582,15 @@ def FltToULong {
}
def DblToInt {
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B))));
dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C))));
dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D))));
}
def DblToUInt {
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B))));
dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C))));
dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D))));
}
def DblToLong {
dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
Expand Down Expand Up @@ -3612,6 +3629,47 @@ def MrgFP {
dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
}

// Word-element merge dags - conversions from f64 to i32 merged into vectors.
def MrgWords {
// For big endian, we merge low and hi doublewords (A, B).
dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0));
dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3));
dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1));
dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0));
dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1));
dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0));

// For little endian, we merge low and hi doublewords (B, A).
dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0));
dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3));
dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1));
dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0));
dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1));
dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0));

// For big endian, we merge hi doublewords of (A, C) and (B, D), convert
// then merge.
dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC),
(COPY_TO_REGCLASS f64:$C, VSRC), 0));
dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC),
(COPY_TO_REGCLASS f64:$D, VSRC), 0));
dag CVACS = (v4i32 (XVCVDPSXWS AC));
dag CVBDS = (v4i32 (XVCVDPSXWS BD));
dag CVACU = (v4i32 (XVCVDPUXWS AC));
dag CVBDU = (v4i32 (XVCVDPUXWS BD));

// For little endian, we merge hi doublewords of (D, B) and (C, A), convert
// then merge.
dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC),
(COPY_TO_REGCLASS f64:$B, VSRC), 0));
dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC),
(COPY_TO_REGCLASS f64:$A, VSRC), 0));
dag CVDBS = (v4i32 (XVCVDPSXWS DB));
dag CVCAS = (v4i32 (XVCVDPSXWS CA));
dag CVDBU = (v4i32 (XVCVDPUXWS DB));
dag CVCAU = (v4i32 (XVCVDPUXWS CA));
}

// Patterns for BUILD_VECTOR nodes.
let AddedComplexity = 400 in {

Expand Down Expand Up @@ -3679,6 +3737,20 @@ let AddedComplexity = 400 in {
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
DblToFlt.B0, DblToFlt.B1)),
(v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;

// Convert 4 doubles to a vector of ints.
def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
DblToInt.C, DblToInt.D)),
(v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
DblToUInt.C, DblToUInt.D)),
(v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
ExtDbl.B0S, ExtDbl.B1S)),
(v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
ExtDbl.B0U, ExtDbl.B1U)),
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
}

let Predicates = [IsLittleEndian, HasVSX] in {
Expand All @@ -3693,6 +3765,20 @@ let AddedComplexity = 400 in {
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
DblToFlt.B0, DblToFlt.B1)),
(v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;

// Convert 4 doubles to a vector of ints.
def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
DblToInt.C, DblToInt.D)),
(v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
DblToUInt.C, DblToUInt.D)),
(v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
ExtDbl.B0S, ExtDbl.B1S)),
(v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
ExtDbl.B0U, ExtDbl.B1U)),
(v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
}

let Predicates = [HasDirectMove] in {
Expand Down

0 comments on commit 63740db

Please sign in to comment.