Skip to content

Commit

Permalink
[PowerPC] address post-commit comments for D106555; NFC
Browse files Browse the repository at this point in the history
Address namanjai post commit comments.
  • Loading branch information
chenzheng1030 committed Nov 5, 2021
1 parent a7a2959 commit 9695027
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 77 deletions.
117 changes: 59 additions & 58 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Expand Up @@ -5826,66 +5826,67 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
}
case PPCISD::LD_SPLAT: {
// For v16i8 and v8i16, if target has no direct move, we can still handle
// this without using stack.
if (Subtarget->hasAltivec() && !Subtarget->hasDirectMove()) {
SDValue ZeroReg =
CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
EVT Type = N->getValueType(0);
if (Type == MVT::v16i8 || Type == MVT::v8i16) {
// v16i8 LD_SPLAT addr
// ======>
// Mask = LVSR/LVSL 0, addr
// LoadLow = LXV 0, addr
// Perm = VPERM LoadLow, LoadLow, Mask
// Splat = VSPLTB 15/0, Perm
//
// v8i16 LD_SPLAT addr
// ======>
// Mask = LVSR/LVSL 0, addr
// LoadLow = LXV 0, addr
// LoadHigh = LXV (LI, 1), addr
// Perm = VPERM LoadLow, LoadHigh, Mask
// Splat = VSPLTH 7/0, Perm
unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
unsigned SplatElemIndex =
Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;

SDNode *Mask = CurDAG->getMachineNode(
Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type,
ZeroReg, N->getOperand(1));

SDNode *LoadLow = CurDAG->getMachineNode(
PPC::LVX, dl, MVT::v16i8, MVT::Other,
{ZeroReg, N->getOperand(1), N->getOperand(0)});

SDNode *LoadHigh = LoadLow;
if (Type == MVT::v8i16) {
LoadHigh = CurDAG->getMachineNode(
PPC::LVX, dl, MVT::v16i8, MVT::Other,
{SDValue(CurDAG->getMachineNode(
LIOpcode, dl, MVT::i32,
CurDAG->getTargetConstant(1, dl, MVT::i8)),
0),
N->getOperand(1), SDValue(LoadLow, 1)});
}
// Here we want to handle splat load for type v16i8 and v8i16 when there is
// no direct move, we don't need to use stack for this case. If target has
// direct move, we should be able to get the best selection in the .td file.
if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
break;

CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
transferMemOperands(N, LoadHigh);
EVT Type = N->getValueType(0);
if (Type != MVT::v16i8 && Type != MVT::v8i16)
break;

SDNode *Perm =
CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
SDValue(LoadHigh, 0), SDValue(Mask, 0));
CurDAG->SelectNodeTo(
N, SplatOp, Type,
CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
SDValue(Perm, 0));
return;
}
}
break;
SDValue ZeroReg =
CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
// v16i8 LD_SPLAT addr
// ======>
// Mask = LVSR/LVSL 0, addr
// LoadLow = LXV 0, addr
// Perm = VPERM LoadLow, LoadLow, Mask
// Splat = VSPLTB 15/0, Perm
//
// v8i16 LD_SPLAT addr
// ======>
// Mask = LVSR/LVSL 0, addr
// LoadLow = LXV 0, addr
// LoadHigh = LXV (LI, 1), addr
// Perm = VPERM LoadLow, LoadHigh, Mask
// Splat = VSPLTH 7/0, Perm
unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
unsigned SplatElemIndex =
Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;

SDNode *Mask = CurDAG->getMachineNode(
Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
N->getOperand(1));

SDNode *LoadLow =
CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
{ZeroReg, N->getOperand(1), N->getOperand(0)});

SDNode *LoadHigh = LoadLow;
if (Type == MVT::v8i16) {
LoadHigh = CurDAG->getMachineNode(
PPC::LVX, dl, MVT::v16i8, MVT::Other,
{SDValue(CurDAG->getMachineNode(
LIOpcode, dl, MVT::i32,
CurDAG->getTargetConstant(1, dl, MVT::i8)),
0),
N->getOperand(1), SDValue(LoadLow, 1)});
}

CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
transferMemOperands(N, LoadHigh);

SDNode *Perm =
CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
SDValue(LoadHigh, 0), SDValue(Mask, 0));
CurDAG->SelectNodeTo(N, SplatOp, Type,
CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
SDValue(Perm, 0));
return;
}
}

Expand Down
38 changes: 19 additions & 19 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -9079,8 +9079,8 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return true;

if (Ty == MVT::v2i64) {
// check the extend type if the input is i32 while the output vector type is
// v2i64.
// Check the extend type, when the input type is i32, and the output vector
// type is v2i64.
if (cast<LoadSDNode>(Op.getOperand(0))->getMemoryVT() == MVT::i32) {
if (ISD::isZEXTLoad(InputNode))
Opcode = PPCISD::ZEXT_LD_SPLAT;
Expand Down Expand Up @@ -9164,8 +9164,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
const SDValue *InputLoad = &Op.getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);

unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits() *
((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
// If the input load is an extending load, it will be an i32 -> i64
// extending load and isValidSplatLoad() will update NewOpcode.
unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
unsigned ElementSize =
MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);

assert(((ElementSize == 2 * MemorySize)
? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
NewOpcode == PPCISD::SEXT_LD_SPLAT)
: (NewOpcode == PPCISD::LD_SPLAT)) &&
"Unmatched element size and opcode!\n");

// Checking for a single use of this load, we have to check for vector
// width (128 bits) / ElementSize uses (since each operand of the
Expand All @@ -9175,7 +9184,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (BVInOp.isUndef())
NumUsesOfInputLD--;

// Execlude somes case where LD_SPLAT is worse than scalar_to_vector:
// Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
// Below cases should also happen for "lfiwzx/lfiwax + LE target + index
// 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
// 15", but funciton IsValidSplatLoad() now will only return true when
Expand All @@ -9193,22 +9202,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
Subtarget.hasLFIWAX()))
return SDValue();

// case 2 - lxvrhx
// 2.1: load result is i16;
// 2.2: build a v8i16 vector with above loaded value;
// case 2 - lxvr[hb]x
// 2.1: load result is at most i16;
// 2.2: build a vector with above loaded value;
// 2.3: the vector has only one value at index 0, others are all undef;
// 2.4: on LE target, so that lxvrhx does not need any permute.
if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
Subtarget.isISA3_1() && Op->getValueType(0) == MVT::v16i8)
return SDValue();

// case 3 - lxvrbx
// 3.1: load result is i8;
// 3.2: build a v16i8 vector with above loaded value;
// 3.3: the vector has only one value at index 0, others are all undef;
// 3.4: on LE target, so that lxvrbx does not need any permute.
// 2.4: on LE target, so that lxvr[hb]x does not need any permute.
if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
Subtarget.isISA3_1() && Op->getValueType(0) == MVT::v8i16)
Subtarget.isISA3_1() && ElementSize <= 16)
return SDValue();

assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
Expand Down

0 comments on commit 9695027

Please sign in to comment.