@@ -9679,13 +9679,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9679
9679
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9680
9680
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9681
9681
9682
- // Recognize build vector patterns to emit VSX vector instructions
9683
- // instead of loading value from memory.
9684
- if (Subtarget.isISA3_1() && Subtarget.hasVSX()) {
9685
- if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9686
- return VecPat;
9687
- }
9688
-
9689
9682
if (Subtarget.hasP10Vector()) {
9690
9683
APInt BitMask(32, 0);
9691
9684
// If the value of the vector is all zeros or all ones,
@@ -9705,6 +9698,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9705
9698
}
9706
9699
return SDV;
9707
9700
}
9701
+ // Recognize build vector patterns to emit VSX vector instructions
9702
+ // instead of loading value from memory.
9703
+ if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9704
+ return VecPat;
9708
9705
}
9709
9706
// Check if this is a splat of a constant value.
9710
9707
APInt APSplatBits, APSplatUndef;
@@ -15679,31 +15676,35 @@ using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15679
15676
15680
15677
static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15681
15678
15682
- static const auto BaseLXVKQPatterns = []() {
15683
- // LXVKQ instruction loads the Quadword value:
15684
- // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15685
- return std::array<std::pair<APInt, uint32_t>, 1>{
15686
- {{APInt(128, 0x8000000000000000ULL) << 64, 16}}};
15687
- }();
15679
+ // LXVKQ instruction loads the Quadword value:
15680
+ // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15681
+ static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15682
+ static const uint32_t Uim = 16;
15688
15683
15689
15684
// Check for direct LXVKQ match (no shift needed)
15690
- for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15691
- if (FullVal == BasePattern)
15692
- return std::make_tuple(Uim, uint8_t{0});
15693
- }
15685
+ if (FullVal == BasePattern)
15686
+ return std::make_tuple(Uim, uint8_t{0});
15694
15687
15695
- // Check if FullValue can be generated by (right) shifting a base pattern
15696
- for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
15697
- if (BasePattern.lshr(127) == FullVal)
15698
- return std::make_tuple(Uim, uint8_t{127});
15699
- }
15688
+ // Check if FullValue is 1 (the result of the base pattern >> 127)
15689
+ if (FullVal == APInt(128, 1))
15690
+ return std::make_tuple(Uim, uint8_t{127});
15700
15691
15701
15692
return std::nullopt;
15702
15693
}
15703
15694
15704
- /// Combine vector loads to a single load by recognising patterns in the Build
15705
- /// Vector. LXVKQ instruction load VSX vector with a special quadword value
15706
- /// based on an immediate value.
15695
+ /// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15696
+ /// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15697
+ /// LXVKQ instruction load VSX vector with a special quadword value based on an
15698
+ /// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15699
+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15700
+ /// This can be used to inline the build vector constants that have the
15701
+ /// following patterns:
15702
+ ///
15703
+ /// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15704
+ /// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15705
+ /// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15706
+ /// combination of splatting and right shift instructions.
15707
+
15707
15708
SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15708
15709
SelectionDAG &DAG) const {
15709
15710
@@ -15775,8 +15776,8 @@ SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15775
15776
}
15776
15777
15777
15778
// The right shifted pattern can be constructed using a combination of
15778
- // XXSPLITIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15779
- // 7 bits of byte 15. This can be specified using XXSPLITIB with immediate
15779
+ // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15780
+ // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
15780
15781
// value 255.
15781
15782
SDValue ShiftAmountVec =
15782
15783
SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
0 commit comments