Skip to content

Commit

Permalink
[PowerPC] Fix a performance bug for PPC::XXPERMDI.
Browse files Browse the repository at this point in the history
There are some VectorShuffle Nodes in SDAG which can be selected to XXPERMDI
Instruction, this patch recognizes them and does the selection to improve
the PPC performance.

Differential Revision: https://reviews.llvm.org/D33404

llvm-svn: 304298
  • Loading branch information
Tony Jiang committed May 31, 2017
1 parent b1f0a34 commit 60c247d
Show file tree
Hide file tree
Showing 5 changed files with 417 additions and 13 deletions.
106 changes: 94 additions & 12 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -1112,6 +1112,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXINSERT: return "PPCISD::XXINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
Expand Down Expand Up @@ -1593,25 +1594,33 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}

// Check that the mask is shuffling words
static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
unsigned B1 = N->getMaskElt(i*4+1);
unsigned B2 = N->getMaskElt(i*4+2);
unsigned B3 = N->getMaskElt(i*4+3);
if (B0 % 4)
return false;
if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
// Check that the mask is shuffling N byte elements.
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) {
assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.");

unsigned NumOfElem = 16 / Width;
unsigned MaskVal[16]; // Width is never greater than 16
for (unsigned i = 0; i < NumOfElem; ++i) {
MaskVal[0] = N->getMaskElt(i * Width);
if (MaskVal[0] % Width) {
return false;
}

for (unsigned int j = 1; j < Width; ++j) {
MaskVal[j] = N->getMaskElt(i * Width + j);
if (MaskVal[j] != MaskVal[j-1] + 1) {
return false;
}
}
}

return true;
}

bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
if (!isWordShuffleMask(N))
if (!isNByteElemShuffleMask(N, 4))
return false;

// Now we look at mask elements 0,4,8,12
Expand Down Expand Up @@ -1688,7 +1697,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the word is consecutive.
if (!isWordShuffleMask(N))
if (!isNByteElemShuffleMask(N, 4))
return false;

// Now we look at mask elements 0,4,8,12, which are the beginning of words.
Expand Down Expand Up @@ -1746,6 +1755,66 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
}
}

/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
/// if the inputs to the instruction should be swapped and set \p DM to the
/// value for the immediate.
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
/// AND element 0 of the result comes from the first input (LE) or second input
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
/// mask.
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");

// Ensure each byte index of the double word is consecutive.
if (!isNByteElemShuffleMask(N, 8))
return false;

unsigned M0 = N->getMaskElt(0) / 8;
unsigned M1 = N->getMaskElt(8) / 8;
assert(((M0 | M1) < 4) && "A mask element out of bounds?");

// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
if ((M0 | M1) < 2) {
DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
Swap = false;
return true;
} else
return false;
}

if (IsLE) {
if (M0 > 1 && M1 < 2) {
Swap = false;
} else if (M0 < 2 && M1 > 1) {
M0 = (M0 + 2) % 4;
M1 = (M1 + 2) % 4;
Swap = true;
} else
return false;

// Note: if control flow comes here that means Swap is already set above
DM = (((~M1) & 1) << 1) + ((~M0) & 1);
return true;
} else { // BE
if (M0 < 2 && M1 > 1) {
Swap = false;
} else if (M0 > 1 && M1 < 2) {
M0 = (M0 + 2) % 4;
M1 = (M1 + 2) % 4;
Swap = true;
} else
return false;

// Note: if control flow comes here that means Swap is already set above
DM = (M0 << 1) + (M1 & 1);
return true;
}
}


/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
Expand Down Expand Up @@ -7760,6 +7829,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
}

if (Subtarget.hasVSX() &&
PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
if (Swap)
std::swap(V1, V2);
SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
SDValue Conv2 =
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);

SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
DAG.getConstant(ShiftElts, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
}

if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Expand Up @@ -90,6 +90,10 @@ namespace llvm {
///
VECSHL,

/// XXPERMDI - The PPC XXPERMDI instruction
///
XXPERMDI,

/// The CMPB instruction (takes two operands of i32 or i64).
CMPB,

Expand Down Expand Up @@ -454,6 +458,10 @@ namespace llvm {
/// for a XXSLDWI instruction.
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
/// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXPERMDI instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);

/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Expand Up @@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;

def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;

def SDT_PPCvcmp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
]>;
Expand Down Expand Up @@ -170,6 +174,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;

def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/PowerPC/PPCInstrVSX.td
Expand Up @@ -843,7 +843,9 @@ let Uses = [RM] in {

def XXPERMDI : XX3Form_2<60, 10,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
"xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
"xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm,
[(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
imm32SExt16:$DM))]>;
let isCodeGenOnly = 1 in
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
Expand Down

0 comments on commit 60c247d

Please sign in to comment.