diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2907303874de5..a4935b210c6a2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9698,6 +9698,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } return SDV; } + // Recognize build vector patterns to emit VSX vector instructions + // instead of loading value from memory. + if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG)) + return VecPat; } // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -15661,6 +15665,137 @@ combineElementTruncationToVectorTruncation(SDNode *N, return SDValue(); } +// LXVKQ instruction load VSX vector with a special quadword value +// based on an immediate value. This helper method returns the details of the +// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount} +// to help generate the LXVKQ instruction and the subsequent shift instruction +// required to match the original build vector pattern. + +// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount} +using LXVKQPattern = std::tuple; + +static std::optional getPatternInfo(const APInt &FullVal) { + + // LXVKQ instruction loads the Quadword value: + // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000 + static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64; + static const uint32_t Uim = 16; + + // Check for direct LXVKQ match (no shift needed) + if (FullVal == BasePattern) + return std::make_tuple(Uim, uint8_t{0}); + + // Check if FullValue is 1 (the result of the base pattern >> 127) + if (FullVal == APInt(128, 1)) + return std::make_tuple(Uim, uint8_t{127}); + + return std::nullopt; +} + +/// Combine vector loads to a single load (using lxvkq) or splat with shift of a +/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector. +/// LXVKQ instruction load VSX vector with a special quadword value based on an +/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value +/// 0x8000_0000_0000_0000_0000_0000_0000_0000. +/// This can be used to inline the build vector constants that have the +/// following patterns: +/// +/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) +/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a +/// combination of splatting and right shift instructions. + +SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op, + SelectionDAG &DAG) const { + + assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) && + "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue"); + + // This transformation is only supported if we are loading either a byte, + // halfword, word, or doubleword. + EVT VT = Op.getValueType(); + if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 || + VT == MVT::v2i64)) + return SDValue(); + + LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector (" + << VT.getEVTString() << "): "; + Op->dump()); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned ElemBits = VT.getScalarSizeInBits(); + + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Check for Non-constant operand in the build vector. + for (const SDValue &Operand : Op.getNode()->op_values()) { + if (!isa(Operand)) + return SDValue(); + } + + // Assemble build vector operands as a 128-bit register value + // We need to reconstruct what the 128-bit register pattern would be + // that produces this vector when interpreted with the current endianness + APInt FullVal = APInt::getZero(128); + + for (unsigned Index = 0; Index < NumElems; ++Index) { + auto *C = cast(Op.getOperand(Index)); + + // Get element value as raw bits (zero-extended) + uint64_t ElemValue = C->getZExtValue(); + + // Mask to element size to ensure we only get the relevant bits + if (ElemBits < 64) + ElemValue &= ((1ULL << ElemBits) - 1); + + // Calculate bit position for this element in the 128-bit register + unsigned BitPos = + (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits); + + // Create APInt for the element value and shift it to correct position + APInt ElemAPInt(128, ElemValue); + ElemAPInt <<= BitPos; + + // Place the element value at the correct bit position + FullVal |= ElemAPInt; + } + + if (auto UIMOpt = getPatternInfo(FullVal)) { + const auto &[Uim, ShiftAmount] = *UIMOpt; + SDLoc Dl(Op); + + // Generate LXVKQ instruction if the shift amount is zero. + if (ShiftAmount == 0) { + SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32); + SDValue LxvkqInstr = + SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0); + LLVM_DEBUG(llvm::dbgs() + << "combineBVLoadsSpecialValue: Instruction Emitted "; + LxvkqInstr.dump()); + return LxvkqInstr; + } + + // The right shifted pattern can be constructed using a combination of + // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower + // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate + // value 255. + SDValue ShiftAmountVec = + SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32, + DAG.getTargetConstant(255, Dl, MVT::i32)), + 0); + // Generate appropriate right shift instruction + SDValue ShiftVec = SDValue( + DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec), + 0); + LLVM_DEBUG(llvm::dbgs() + << "\n combineBVLoadsSpecialValue: Instruction Emitted "; + ShiftVec.dump()); + return ShiftVec; + } + // No patterns matched for build vectors. + return SDValue(); +} + /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 669430550f4e6..97382cd8f613c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1471,6 +1471,9 @@ namespace llvm { combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBVLoadsSpecialValue(SDValue Operand, + SelectionDAG &DAG) const; + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be /// handled by the VINSERTH instruction introduced in ISA 3.0. This is /// essentially any shuffle of v8i16 vectors that just inserts one element diff --git a/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll new file mode 100644 index 0000000000000..0ee4524a6c68a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-LE-10 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-BE-10 + +; Test LXVKQ instruction generation for special vector constants matching 128 bit patterns: +; 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +; 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) + +; ============================================================================= +; v2i64 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-9223372036854775808, 0> +define dso_local noundef <2 x i64> @test_v2i64_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI0_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, -9223372036854775808> +define dso_local noundef <2 x i64> @test_v2i64_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI1_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> +} + +; ============================================================================= +; v4i32 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-2147483648, 0, 0, 0> +define dso_local noundef <4 x i32> @test_v4i32_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI2_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, -2147483648> +define dso_local noundef <4 x i32> @test_v4i32_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI3_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> +} + +; ============================================================================= +; v8i16 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-32768, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <8 x i16> @test_v8i16_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI4_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, -32768> +define dso_local noundef <8 x i16> @test_v8i16_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI5_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> +} + +; ============================================================================= +; v16i8 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <16 x i8> @test_v16i8_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI6_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128> +define dso_local noundef <16 x i8> @test_v16i8_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI7_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> +} + +; ============================================================================= +; v2i64 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 1> +define dso_local noundef <2 x i64> @test_v2i64_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI8_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0> +define dso_local noundef <2 x i64> @test_v2i64_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI9_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> +} + +; ============================================================================= +; v4i32 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 1> +define dso_local noundef <4 x i32> @test_v4i32_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI10_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0> +define dso_local noundef <4 x i32> @test_v4i32_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI11_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> +} + +; ============================================================================= +; v8i16 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 1> +define dso_local noundef <8 x i16> @test_v8i16_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI12_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <8 x i16> @test_v8i16_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI13_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> +} + +; ============================================================================= +; v16i8 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1> +define dso_local noundef <16 x i8> @test_v16i8_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI14_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <16 x i8> @test_v16i8_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI15_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> +} \ No newline at end of file diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll index 0892210fc7443..d506d2062be5c 100644 --- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll @@ -1566,12 +1566,16 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-LABEL: v16i8tov16i64_sign: ; PWR10BE: # %bb.0: # %entry ; PWR10BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha +; PWR10BE-NEXT: xxspltib v1, 255 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_0@toc@l +; PWR10BE-NEXT: vsrq v1, v1, v1 ; PWR10BE-NEXT: lxv v3, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha ; PWR10BE-NEXT: addi r3, r3, .LCPI23_1@toc@l +; PWR10BE-NEXT: vperm v1, v2, v2, v1 ; PWR10BE-NEXT: lxv v4, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha +; PWR10BE-NEXT: vextsb2d v1, v1 ; PWR10BE-NEXT: vperm v3, v2, v2, v3 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l ; PWR10BE-NEXT: vextsb2d v3, v3 @@ -1585,23 +1589,18 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-NEXT: vperm v5, v2, v2, v5 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l ; PWR10BE-NEXT: vextsb2d v5, v5 -; PWR10BE-NEXT: lxv v1, 0(r3) +; PWR10BE-NEXT: lxv v6, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha ; PWR10BE-NEXT: vperm v0, v2, v2, v0 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l ; PWR10BE-NEXT: vextsb2d v0, v0 -; PWR10BE-NEXT: lxv v6, 0(r3) +; PWR10BE-NEXT: lxv v7, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha -; PWR10BE-NEXT: vperm v1, v2, v2, v1 +; PWR10BE-NEXT: vperm v6, v2, v2, v6 ; PWR10BE-NEXT: vaddudm v5, v0, v5 ; PWR10BE-NEXT: vaddudm v3, v4, v3 ; PWR10BE-NEXT: vaddudm v3, v3, v5 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l -; PWR10BE-NEXT: vextsb2d v1, v1 -; PWR10BE-NEXT: lxv v7, 0(r3) -; PWR10BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha -; PWR10BE-NEXT: vperm v6, v2, v2, v6 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_7@toc@l ; PWR10BE-NEXT: vextsb2d v6, v6 ; PWR10BE-NEXT: lxv v8, 0(r3) ; PWR10BE-NEXT: vperm v7, v2, v2, v7 @@ -1609,7 +1608,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-NEXT: vperm v2, v2, v2, v8 ; PWR10BE-NEXT: vextsb2d v2, v2 ; PWR10BE-NEXT: vaddudm v2, v2, v7 -; PWR10BE-NEXT: vaddudm v4, v6, v1 +; PWR10BE-NEXT: vaddudm v4, v1, v6 ; PWR10BE-NEXT: vaddudm v2, v4, v2 ; PWR10BE-NEXT: vaddudm v2, v2, v3 ; PWR10BE-NEXT: xxswapd v3, v2