Skip to content

Commit ee47d25

Browse files
author
Tony Varghese
committed
Emit lxvkq and vsrq instructions for build vector patterns
1 parent ad49111 commit ee47d25

File tree

4 files changed

+452
-9
lines changed

4 files changed

+452
-9
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9679,6 +9679,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
96799679
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
96809680
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
96819681

9682+
// Recognize build vector patterns to emit VSX vector instructions
9683+
// instead of loading value from memory.
9684+
if (Subtarget.isISA3_1() && Subtarget.hasVSX()) {
9685+
if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9686+
return VecPat;
9687+
}
9688+
96829689
if (Subtarget.hasP10Vector()) {
96839690
APInt BitMask(32, 0);
96849691
// If the value of the vector is all zeros or all ones,
@@ -15657,6 +15664,133 @@ combineElementTruncationToVectorTruncation(SDNode *N,
1565715664
return SDValue();
1565815665
}
1565915666

15667+
// LXVKQ instruction load VSX vector with a special quadword value
15668+
// based on an immediate value. This helper method returns the details of the
15669+
// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15670+
// to help generate the LXVKQ instruction and the subsequent shift instruction
15671+
// required to match the original build vector pattern.
15672+
15673+
// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15674+
using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15675+
15676+
static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15677+
15678+
static const auto BaseLXVKQPatterns = []() {
15679+
// LXVKQ instruction loads the Quadword value:
15680+
// 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15681+
return std::array<std::pair<APInt, uint32_t>, 1>{
15682+
{{APInt(128, 0x8000000000000000ULL) << 64, 16}}};
15683+
}();
15684+
15685+
// Check for direct LXVKQ match (no shift needed)
15686+
for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15687+
if (FullVal == BasePattern)
15688+
return std::make_tuple(Uim, uint8_t{0});
15689+
}
15690+
15691+
// Check if FullValue can be generated by (right) shifting a base pattern
15692+
for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15693+
if (BasePattern.lshr(127) == FullVal)
15694+
return std::make_tuple(Uim, uint8_t{127});
15695+
}
15696+
15697+
return std::nullopt;
15698+
}
15699+
15700+
/// Combine vector loads to a single load by recognising patterns in the Build
15701+
/// Vector. LXVKQ instruction load VSX vector with a special quadword value
15702+
/// based on an immediate value.
15703+
SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15704+
SelectionDAG &DAG) const {
15705+
15706+
assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15707+
"Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15708+
15709+
// This transformation is only supported if we are loading either a byte,
15710+
// halfword, word, or doubleword.
15711+
EVT VT = Op.getValueType();
15712+
if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15713+
VT == MVT::v2i64))
15714+
return SDValue();
15715+
15716+
LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15717+
<< VT.getEVTString() << "): ";
15718+
Op->dump());
15719+
15720+
unsigned NumElems = VT.getVectorNumElements();
15721+
unsigned ElemBits = VT.getScalarSizeInBits();
15722+
15723+
bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15724+
15725+
// Check for Non-constant operand in the build vector.
15726+
for (const SDValue &Operand : Op.getNode()->op_values()) {
15727+
if (!isa<ConstantSDNode>(Operand))
15728+
return SDValue();
15729+
}
15730+
15731+
// Assemble build vector operands as a 128-bit register value
15732+
// We need to reconstruct what the 128-bit register pattern would be
15733+
// that produces this vector when interpreted with the current endianness
15734+
APInt FullVal = APInt::getZero(128);
15735+
15736+
for (unsigned Index = 0; Index < NumElems; ++Index) {
15737+
auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15738+
15739+
// Get element value as raw bits (zero-extended)
15740+
uint64_t ElemValue = C->getZExtValue();
15741+
15742+
// Mask to element size to ensure we only get the relevant bits
15743+
if (ElemBits < 64)
15744+
ElemValue &= ((1ULL << ElemBits) - 1);
15745+
15746+
// Calculate bit position for this element in the 128-bit register
15747+
unsigned BitPos =
15748+
(IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15749+
15750+
// Create APInt for the element value and shift it to correct position
15751+
APInt ElemAPInt(128, ElemValue);
15752+
ElemAPInt <<= BitPos;
15753+
15754+
// Place the element value at the correct bit position
15755+
FullVal |= ElemAPInt;
15756+
}
15757+
15758+
if (auto UIMOpt = getPatternInfo(FullVal)) {
15759+
const auto &[Uim, ShiftAmount] = *UIMOpt;
15760+
SDLoc Dl(Op);
15761+
15762+
// Generate LXVKQ instruction if the shift amount is zero.
15763+
if (ShiftAmount == 0) {
15764+
SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15765+
SDValue LxvkqInstr =
15766+
SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15767+
LLVM_DEBUG(llvm::dbgs()
15768+
<< "combineBVLoadsSpecialValue: Instruction Emitted ";
15769+
LxvkqInstr.dump());
15770+
return LxvkqInstr;
15771+
}
15772+
15773+
// The right shifted pattern can be constructed using a combination of
15774+
// XXSPLITIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15775+
// 7 bits of byte 15. This can be specified using XXSPLITIB with immediate
15776+
// value 255.
15777+
SDValue ShiftAmountVec =
15778+
SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
15779+
DAG.getTargetConstant(255, Dl, MVT::i32)),
15780+
0);
15781+
// Generate appropriate right shift instruction
15782+
SDValue ShiftVec = SDValue(
15783+
DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
15784+
0);
15785+
LLVM_DEBUG(llvm::dbgs()
15786+
<< "\n combineBVLoadsSpecialValue: Instruction Emitted ";
15787+
ShiftVec.dump());
15788+
return ShiftVec;
15789+
}
15790+
// No patterns matched for build vectors.
15791+
return SDValue();
15792+
}
15793+
1566015794
/// Reduce the number of loads when building a vector.
1566115795
///
1566215796
/// Building a vector out of multiple loads can be converted to a load

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,6 +1471,9 @@ namespace llvm {
14711471
combineElementTruncationToVectorTruncation(SDNode *N,
14721472
DAGCombinerInfo &DCI) const;
14731473

1474+
SDValue combineBVLoadsSpecialValue(SDValue Operand,
1475+
SelectionDAG &DAG) const;
1476+
14741477
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
14751478
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
14761479
/// essentially any shuffle of v8i16 vectors that just inserts one element

0 commit comments

Comments
 (0)