Skip to content

Commit cc537c7

Browse files
author
Tony Varghese
committed
Emit lxvkq and vsrq instructions for build vector patterns
1 parent 761c31e commit cc537c7

File tree

4 files changed

+452
-9
lines changed

4 files changed

+452
-9
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9679,6 +9679,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
96799679
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
96809680
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
96819681

9682+
// Recognize build vector patterns to emit VSX vector instructions
9683+
// instead of loading value from memory.
9684+
if (Subtarget.isISA3_1() && Subtarget.hasVSX()) {
9685+
if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9686+
return VecPat;
9687+
}
9688+
96829689
if (Subtarget.hasP10Vector()) {
96839690
APInt BitMask(32, 0);
96849691
// If the value of the vector is all zeros or all ones,
@@ -15661,6 +15668,133 @@ combineElementTruncationToVectorTruncation(SDNode *N,
1566115668
return SDValue();
1566215669
}
1566315670

15671+
// LXVKQ instruction load VSX vector with a special quadword value
15672+
// based on an immediate value. This helper method returns the details of the
15673+
// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15674+
// to help generate the LXVKQ instruction and the subsequent shift instruction
15675+
// required to match the original build vector pattern.
15676+
15677+
// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15678+
using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15679+
15680+
static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15681+
15682+
static const auto BaseLXVKQPatterns = []() {
15683+
// LXVKQ instruction loads the Quadword value:
15684+
// 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15685+
return std::array<std::pair<APInt, uint32_t>, 1>{
15686+
{{APInt(128, 0x8000000000000000ULL) << 64, 16}}};
15687+
}();
15688+
15689+
// Check for direct LXVKQ match (no shift needed)
15690+
for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15691+
if (FullVal == BasePattern)
15692+
return std::make_tuple(Uim, uint8_t{0});
15693+
}
15694+
15695+
// Check if FullValue can be generated by (right) shifting a base pattern
15696+
for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15697+
if (BasePattern.lshr(127) == FullVal)
15698+
return std::make_tuple(Uim, uint8_t{127});
15699+
}
15700+
15701+
return std::nullopt;
15702+
}
15703+
15704+
/// Combine vector loads to a single load by recognising patterns in the Build
15705+
/// Vector. LXVKQ instruction load VSX vector with a special quadword value
15706+
/// based on an immediate value.
15707+
SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15708+
SelectionDAG &DAG) const {
15709+
15710+
assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15711+
"Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15712+
15713+
// This transformation is only supported if we are loading either a byte,
15714+
// halfword, word, or doubleword.
15715+
EVT VT = Op.getValueType();
15716+
if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15717+
VT == MVT::v2i64))
15718+
return SDValue();
15719+
15720+
LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15721+
<< VT.getEVTString() << "): ";
15722+
Op->dump());
15723+
15724+
unsigned NumElems = VT.getVectorNumElements();
15725+
unsigned ElemBits = VT.getScalarSizeInBits();
15726+
15727+
bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15728+
15729+
// Check for Non-constant operand in the build vector.
15730+
for (const SDValue &Operand : Op.getNode()->op_values()) {
15731+
if (!isa<ConstantSDNode>(Operand))
15732+
return SDValue();
15733+
}
15734+
15735+
// Assemble build vector operands as a 128-bit register value
15736+
// We need to reconstruct what the 128-bit register pattern would be
15737+
// that produces this vector when interpreted with the current endianness
15738+
APInt FullVal = APInt::getZero(128);
15739+
15740+
for (unsigned Index = 0; Index < NumElems; ++Index) {
15741+
auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15742+
15743+
// Get element value as raw bits (zero-extended)
15744+
uint64_t ElemValue = C->getZExtValue();
15745+
15746+
// Mask to element size to ensure we only get the relevant bits
15747+
if (ElemBits < 64)
15748+
ElemValue &= ((1ULL << ElemBits) - 1);
15749+
15750+
// Calculate bit position for this element in the 128-bit register
15751+
unsigned BitPos =
15752+
(IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15753+
15754+
// Create APInt for the element value and shift it to correct position
15755+
APInt ElemAPInt(128, ElemValue);
15756+
ElemAPInt <<= BitPos;
15757+
15758+
// Place the element value at the correct bit position
15759+
FullVal |= ElemAPInt;
15760+
}
15761+
15762+
if (auto UIMOpt = getPatternInfo(FullVal)) {
15763+
const auto &[Uim, ShiftAmount] = *UIMOpt;
15764+
SDLoc Dl(Op);
15765+
15766+
// Generate LXVKQ instruction if the shift amount is zero.
15767+
if (ShiftAmount == 0) {
15768+
SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15769+
SDValue LxvkqInstr =
15770+
SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15771+
LLVM_DEBUG(llvm::dbgs()
15772+
<< "combineBVLoadsSpecialValue: Instruction Emitted ";
15773+
LxvkqInstr.dump());
15774+
return LxvkqInstr;
15775+
}
15776+
15777+
// The right shifted pattern can be constructed using a combination of
15778+
// XXSPLITIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15779+
// 7 bits of byte 15. This can be specified using XXSPLITIB with immediate
15780+
// value 255.
15781+
SDValue ShiftAmountVec =
15782+
SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
15783+
DAG.getTargetConstant(255, Dl, MVT::i32)),
15784+
0);
15785+
// Generate appropriate right shift instruction
15786+
SDValue ShiftVec = SDValue(
15787+
DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
15788+
0);
15789+
LLVM_DEBUG(llvm::dbgs()
15790+
<< "\n combineBVLoadsSpecialValue: Instruction Emitted ";
15791+
ShiftVec.dump());
15792+
return ShiftVec;
15793+
}
15794+
// No patterns matched for build vectors.
15795+
return SDValue();
15796+
}
15797+
1566415798
/// Reduce the number of loads when building a vector.
1566515799
///
1566615800
/// Building a vector out of multiple loads can be converted to a load

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,6 +1471,9 @@ namespace llvm {
14711471
combineElementTruncationToVectorTruncation(SDNode *N,
14721472
DAGCombinerInfo &DCI) const;
14731473

1474+
SDValue combineBVLoadsSpecialValue(SDValue Operand,
1475+
SelectionDAG &DAG) const;
1476+
14741477
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
14751478
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
14761479
/// essentially any shuffle of v8i16 vectors that just inserts one element

0 commit comments

Comments
 (0)